Coverage Report

Created: 2026-01-20 07:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
3.02M
                                   const size_t output_stride, float* scratch) {
40
3.02M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
3.02M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
3.02M
  float* block = scratch;
43
3.02M
  if (ROWS < COLS) {
44
2.90M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
6.62M
      for (size_t x = 0; x < LF_COLS; x++) {
46
4.99M
        block[y * COLS + x] = input[y * input_stride + x] *
47
4.99M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
4.99M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
4.99M
      }
50
1.63M
    }
51
1.75M
  } else {
52
5.97M
    for (size_t y = 0; y < LF_COLS; y++) {
53
20.9M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
16.7M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
16.7M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
16.7M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
16.7M
      }
58
4.22M
    }
59
1.75M
  }
60
61
3.02M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
3.02M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
3.02M
                                  scratch_space);
64
3.02M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
328k
                                   const size_t output_stride, float* scratch) {
40
328k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
328k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
328k
  float* block = scratch;
43
328k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
328k
  } else {
52
656k
    for (size_t y = 0; y < LF_COLS; y++) {
53
985k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
656k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
656k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
656k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
656k
      }
58
328k
    }
59
328k
  }
60
61
328k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
328k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
328k
                                  scratch_space);
64
328k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
473k
                                   const size_t output_stride, float* scratch) {
40
473k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
473k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
473k
  float* block = scratch;
43
473k
  if (ROWS < COLS) {
44
946k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.41M
      for (size_t x = 0; x < LF_COLS; x++) {
46
946k
        block[y * COLS + x] = input[y * input_stride + x] *
47
946k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
946k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
946k
      }
50
473k
    }
51
473k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
473k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
473k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
473k
                                  scratch_space);
64
473k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
195k
                                   const size_t output_stride, float* scratch) {
40
195k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
195k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
195k
  float* block = scratch;
43
195k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
195k
  } else {
52
586k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.17M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
781k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
781k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
781k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
781k
      }
58
390k
    }
59
195k
  }
60
61
195k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
195k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
195k
                                  scratch_space);
64
195k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
92.9k
                                   const size_t output_stride, float* scratch) {
40
92.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
92.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
92.9k
  float* block = scratch;
43
92.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
92.9k
  } else {
52
278k
    for (size_t y = 0; y < LF_COLS; y++) {
53
929k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
743k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
743k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
743k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
743k
      }
58
185k
    }
59
92.9k
  }
60
61
92.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
92.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
92.9k
                                  scratch_space);
64
92.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
149k
                                   const size_t output_stride, float* scratch) {
40
149k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
149k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
149k
  float* block = scratch;
43
149k
  if (ROWS < COLS) {
44
447k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.49M
      for (size_t x = 0; x < LF_COLS; x++) {
46
1.19M
        block[y * COLS + x] = input[y * input_stride + x] *
47
1.19M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
1.19M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
1.19M
      }
50
298k
    }
51
149k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
149k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
149k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
149k
                                  scratch_space);
64
149k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
215k
                                   const size_t output_stride, float* scratch) {
40
215k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
215k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
215k
  float* block = scratch;
43
215k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
215k
  } else {
52
1.07M
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.31M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.45M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.45M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.45M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.45M
      }
58
862k
    }
59
215k
  }
60
61
215k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
215k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
215k
                                  scratch_space);
64
215k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.54k
                                   const size_t output_stride, float* scratch) {
40
6.54k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.54k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.54k
  float* block = scratch;
43
6.54k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.54k
  } else {
52
32.7k
    for (size_t y = 0; y < LF_COLS; y++) {
53
235k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
209k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
209k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
209k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
209k
      }
58
26.1k
    }
59
6.54k
  }
60
61
6.54k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.54k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.54k
                                  scratch_space);
64
6.54k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
11.1k
                                   const size_t output_stride, float* scratch) {
40
11.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
11.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
11.1k
  float* block = scratch;
43
11.1k
  if (ROWS < COLS) {
44
55.9k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
402k
      for (size_t x = 0; x < LF_COLS; x++) {
46
357k
        block[y * COLS + x] = input[y * input_stride + x] *
47
357k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
357k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
357k
      }
50
44.7k
    }
51
11.1k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
11.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
11.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
11.1k
                                  scratch_space);
64
11.1k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
39.5k
                                   const size_t output_stride, float* scratch) {
40
39.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
39.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
39.5k
  float* block = scratch;
43
39.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
39.5k
  } else {
52
355k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.84M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.52M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.52M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.52M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.52M
      }
58
316k
    }
59
39.5k
  }
60
61
39.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
39.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
39.5k
                                  scratch_space);
64
39.5k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
328k
                                   const size_t output_stride, float* scratch) {
40
328k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
328k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
328k
  float* block = scratch;
43
328k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
328k
  } else {
52
656k
    for (size_t y = 0; y < LF_COLS; y++) {
53
985k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
656k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
656k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
656k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
656k
      }
58
328k
    }
59
328k
  }
60
61
328k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
328k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
328k
                                  scratch_space);
64
328k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
473k
                                   const size_t output_stride, float* scratch) {
40
473k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
473k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
473k
  float* block = scratch;
43
473k
  if (ROWS < COLS) {
44
946k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.41M
      for (size_t x = 0; x < LF_COLS; x++) {
46
946k
        block[y * COLS + x] = input[y * input_stride + x] *
47
946k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
946k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
946k
      }
50
473k
    }
51
473k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
473k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
473k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
473k
                                  scratch_space);
64
473k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
195k
                                   const size_t output_stride, float* scratch) {
40
195k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
195k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
195k
  float* block = scratch;
43
195k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
195k
  } else {
52
586k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.17M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
781k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
781k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
781k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
781k
      }
58
390k
    }
59
195k
  }
60
61
195k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
195k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
195k
                                  scratch_space);
64
195k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
92.9k
                                   const size_t output_stride, float* scratch) {
40
92.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
92.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
92.9k
  float* block = scratch;
43
92.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
92.9k
  } else {
52
278k
    for (size_t y = 0; y < LF_COLS; y++) {
53
929k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
743k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
743k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
743k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
743k
      }
58
185k
    }
59
92.9k
  }
60
61
92.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
92.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
92.9k
                                  scratch_space);
64
92.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
149k
                                   const size_t output_stride, float* scratch) {
40
149k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
149k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
149k
  float* block = scratch;
43
149k
  if (ROWS < COLS) {
44
447k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.49M
      for (size_t x = 0; x < LF_COLS; x++) {
46
1.19M
        block[y * COLS + x] = input[y * input_stride + x] *
47
1.19M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
1.19M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
1.19M
      }
50
298k
    }
51
149k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
149k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
149k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
149k
                                  scratch_space);
64
149k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
215k
                                   const size_t output_stride, float* scratch) {
40
215k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
215k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
215k
  float* block = scratch;
43
215k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
215k
  } else {
52
1.07M
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.31M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.45M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.45M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.45M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.45M
      }
58
862k
    }
59
215k
  }
60
61
215k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
215k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
215k
                                  scratch_space);
64
215k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.54k
                                   const size_t output_stride, float* scratch) {
40
6.54k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.54k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.54k
  float* block = scratch;
43
6.54k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.54k
  } else {
52
32.7k
    for (size_t y = 0; y < LF_COLS; y++) {
53
235k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
209k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
209k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
209k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
209k
      }
58
26.1k
    }
59
6.54k
  }
60
61
6.54k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.54k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.54k
                                  scratch_space);
64
6.54k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
11.1k
                                   const size_t output_stride, float* scratch) {
40
11.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
11.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
11.1k
  float* block = scratch;
43
11.1k
  if (ROWS < COLS) {
44
55.9k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
402k
      for (size_t x = 0; x < LF_COLS; x++) {
46
357k
        block[y * COLS + x] = input[y * input_stride + x] *
47
357k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
357k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
357k
      }
50
44.7k
    }
51
11.1k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
11.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
11.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
11.1k
                                  scratch_space);
64
11.1k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
39.5k
                                   const size_t output_stride, float* scratch) {
40
39.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
39.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
39.5k
  float* block = scratch;
43
39.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
39.5k
  } else {
52
355k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.84M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.52M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.52M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.52M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.52M
      }
58
316k
    }
59
39.5k
  }
60
61
39.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
39.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
39.5k
                                  scratch_space);
64
39.5k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
63.3M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
63.3M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
63.3M
  static_assert(S % 2 == 0, "S should be even");
70
63.3M
  float temp[kDCTBlockSize];
71
63.3M
  constexpr size_t num_2x2 = S / 2;
72
211M
  for (size_t y = 0; y < num_2x2; y++) {
73
590M
    for (size_t x = 0; x < num_2x2; x++) {
74
443M
      float c00 = block[y * 2 * stride + x * 2];
75
443M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
443M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
443M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
443M
      float r00 = c00 + c01 + c10 + c11;
79
443M
      float r01 = c00 + c01 - c10 - c11;
80
443M
      float r10 = c00 - c01 + c10 - c11;
81
443M
      float r11 = c00 - c01 - c10 + c11;
82
443M
      r00 *= 0.25f;
83
443M
      r01 *= 0.25f;
84
443M
      r10 *= 0.25f;
85
443M
      r11 *= 0.25f;
86
443M
      temp[y * kBlockDim + x] = r00;
87
443M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
443M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
443M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
443M
    }
91
147M
  }
92
358M
  for (size_t y = 0; y < S; y++) {
93
2.06G
    for (size_t x = 0; x < S; x++) {
94
1.77G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.77G
    }
96
295M
  }
97
63.3M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.14M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.14M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.14M
  static_assert(S % 2 == 0, "S should be even");
70
1.14M
  float temp[kDCTBlockSize];
71
1.14M
  constexpr size_t num_2x2 = S / 2;
72
5.72M
  for (size_t y = 0; y < num_2x2; y++) {
73
22.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
18.3M
      float c00 = block[y * 2 * stride + x * 2];
75
18.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
18.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
18.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
18.3M
      float r00 = c00 + c01 + c10 + c11;
79
18.3M
      float r01 = c00 + c01 - c10 - c11;
80
18.3M
      float r10 = c00 - c01 + c10 - c11;
81
18.3M
      float r11 = c00 - c01 - c10 + c11;
82
18.3M
      r00 *= 0.25f;
83
18.3M
      r01 *= 0.25f;
84
18.3M
      r10 *= 0.25f;
85
18.3M
      r11 *= 0.25f;
86
18.3M
      temp[y * kBlockDim + x] = r00;
87
18.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
18.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
18.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
18.3M
    }
91
4.57M
  }
92
10.2M
  for (size_t y = 0; y < S; y++) {
93
82.3M
    for (size_t x = 0; x < S; x++) {
94
73.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
73.2M
    }
96
9.15M
  }
97
1.14M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.14M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.14M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.14M
  static_assert(S % 2 == 0, "S should be even");
70
1.14M
  float temp[kDCTBlockSize];
71
1.14M
  constexpr size_t num_2x2 = S / 2;
72
3.43M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.86M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.57M
      float c00 = block[y * 2 * stride + x * 2];
75
4.57M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.57M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.57M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.57M
      float r00 = c00 + c01 + c10 + c11;
79
4.57M
      float r01 = c00 + c01 - c10 - c11;
80
4.57M
      float r10 = c00 - c01 + c10 - c11;
81
4.57M
      float r11 = c00 - c01 - c10 + c11;
82
4.57M
      r00 *= 0.25f;
83
4.57M
      r01 *= 0.25f;
84
4.57M
      r10 *= 0.25f;
85
4.57M
      r11 *= 0.25f;
86
4.57M
      temp[y * kBlockDim + x] = r00;
87
4.57M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.57M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.57M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.57M
    }
91
2.28M
  }
92
5.72M
  for (size_t y = 0; y < S; y++) {
93
22.8M
    for (size_t x = 0; x < S; x++) {
94
18.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
18.3M
    }
96
4.57M
  }
97
1.14M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.14M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.14M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.14M
  static_assert(S % 2 == 0, "S should be even");
70
1.14M
  float temp[kDCTBlockSize];
71
1.14M
  constexpr size_t num_2x2 = S / 2;
72
2.28M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.28M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.14M
      float c00 = block[y * 2 * stride + x * 2];
75
1.14M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.14M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.14M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.14M
      float r00 = c00 + c01 + c10 + c11;
79
1.14M
      float r01 = c00 + c01 - c10 - c11;
80
1.14M
      float r10 = c00 - c01 + c10 - c11;
81
1.14M
      float r11 = c00 - c01 - c10 + c11;
82
1.14M
      r00 *= 0.25f;
83
1.14M
      r01 *= 0.25f;
84
1.14M
      r10 *= 0.25f;
85
1.14M
      r11 *= 0.25f;
86
1.14M
      temp[y * kBlockDim + x] = r00;
87
1.14M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.14M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.14M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.14M
    }
91
1.14M
  }
92
3.43M
  for (size_t y = 0; y < S; y++) {
93
6.86M
    for (size_t x = 0; x < S; x++) {
94
4.57M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.57M
    }
96
2.28M
  }
97
1.14M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
18.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
18.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
18.8M
  static_assert(S % 2 == 0, "S should be even");
70
18.8M
  float temp[kDCTBlockSize];
71
18.8M
  constexpr size_t num_2x2 = S / 2;
72
94.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
376M
    for (size_t x = 0; x < num_2x2; x++) {
74
301M
      float c00 = block[y * 2 * stride + x * 2];
75
301M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
301M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
301M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
301M
      float r00 = c00 + c01 + c10 + c11;
79
301M
      float r01 = c00 + c01 - c10 - c11;
80
301M
      float r10 = c00 - c01 + c10 - c11;
81
301M
      float r11 = c00 - c01 - c10 + c11;
82
301M
      r00 *= 0.25f;
83
301M
      r01 *= 0.25f;
84
301M
      r10 *= 0.25f;
85
301M
      r11 *= 0.25f;
86
301M
      temp[y * kBlockDim + x] = r00;
87
301M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
301M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
301M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
301M
    }
91
75.2M
  }
92
169M
  for (size_t y = 0; y < S; y++) {
93
1.35G
    for (size_t x = 0; x < S; x++) {
94
1.20G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.20G
    }
96
150M
  }
97
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
18.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
18.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
18.8M
  static_assert(S % 2 == 0, "S should be even");
70
18.8M
  float temp[kDCTBlockSize];
71
18.8M
  constexpr size_t num_2x2 = S / 2;
72
56.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
112M
    for (size_t x = 0; x < num_2x2; x++) {
74
75.2M
      float c00 = block[y * 2 * stride + x * 2];
75
75.2M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
75.2M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
75.2M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
75.2M
      float r00 = c00 + c01 + c10 + c11;
79
75.2M
      float r01 = c00 + c01 - c10 - c11;
80
75.2M
      float r10 = c00 - c01 + c10 - c11;
81
75.2M
      float r11 = c00 - c01 - c10 + c11;
82
75.2M
      r00 *= 0.25f;
83
75.2M
      r01 *= 0.25f;
84
75.2M
      r10 *= 0.25f;
85
75.2M
      r11 *= 0.25f;
86
75.2M
      temp[y * kBlockDim + x] = r00;
87
75.2M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
75.2M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
75.2M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
75.2M
    }
91
37.6M
  }
92
94.0M
  for (size_t y = 0; y < S; y++) {
93
376M
    for (size_t x = 0; x < S; x++) {
94
301M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
301M
    }
96
75.2M
  }
97
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
18.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
18.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
18.8M
  static_assert(S % 2 == 0, "S should be even");
70
18.8M
  float temp[kDCTBlockSize];
71
18.8M
  constexpr size_t num_2x2 = S / 2;
72
37.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
37.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
18.8M
      float c00 = block[y * 2 * stride + x * 2];
75
18.8M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
18.8M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
18.8M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
18.8M
      float r00 = c00 + c01 + c10 + c11;
79
18.8M
      float r01 = c00 + c01 - c10 - c11;
80
18.8M
      float r10 = c00 - c01 + c10 - c11;
81
18.8M
      float r11 = c00 - c01 - c10 + c11;
82
18.8M
      r00 *= 0.25f;
83
18.8M
      r01 *= 0.25f;
84
18.8M
      r10 *= 0.25f;
85
18.8M
      r11 *= 0.25f;
86
18.8M
      temp[y * kBlockDim + x] = r00;
87
18.8M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
18.8M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
18.8M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
18.8M
    }
91
18.8M
  }
92
56.4M
  for (size_t y = 0; y < S; y++) {
93
112M
    for (size_t x = 0; x < S; x++) {
94
75.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
75.2M
    }
96
37.6M
  }
97
18.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.14M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.14M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.14M
  static_assert(S % 2 == 0, "S should be even");
70
1.14M
  float temp[kDCTBlockSize];
71
1.14M
  constexpr size_t num_2x2 = S / 2;
72
5.72M
  for (size_t y = 0; y < num_2x2; y++) {
73
22.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
18.3M
      float c00 = block[y * 2 * stride + x * 2];
75
18.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
18.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
18.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
18.3M
      float r00 = c00 + c01 + c10 + c11;
79
18.3M
      float r01 = c00 + c01 - c10 - c11;
80
18.3M
      float r10 = c00 - c01 + c10 - c11;
81
18.3M
      float r11 = c00 - c01 - c10 + c11;
82
18.3M
      r00 *= 0.25f;
83
18.3M
      r01 *= 0.25f;
84
18.3M
      r10 *= 0.25f;
85
18.3M
      r11 *= 0.25f;
86
18.3M
      temp[y * kBlockDim + x] = r00;
87
18.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
18.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
18.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
18.3M
    }
91
4.57M
  }
92
10.2M
  for (size_t y = 0; y < S; y++) {
93
82.3M
    for (size_t x = 0; x < S; x++) {
94
73.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
73.2M
    }
96
9.15M
  }
97
1.14M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.14M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.14M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.14M
  static_assert(S % 2 == 0, "S should be even");
70
1.14M
  float temp[kDCTBlockSize];
71
1.14M
  constexpr size_t num_2x2 = S / 2;
72
3.43M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.86M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.57M
      float c00 = block[y * 2 * stride + x * 2];
75
4.57M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.57M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.57M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.57M
      float r00 = c00 + c01 + c10 + c11;
79
4.57M
      float r01 = c00 + c01 - c10 - c11;
80
4.57M
      float r10 = c00 - c01 + c10 - c11;
81
4.57M
      float r11 = c00 - c01 - c10 + c11;
82
4.57M
      r00 *= 0.25f;
83
4.57M
      r01 *= 0.25f;
84
4.57M
      r10 *= 0.25f;
85
4.57M
      r11 *= 0.25f;
86
4.57M
      temp[y * kBlockDim + x] = r00;
87
4.57M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.57M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.57M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.57M
    }
91
2.28M
  }
92
5.72M
  for (size_t y = 0; y < S; y++) {
93
22.8M
    for (size_t x = 0; x < S; x++) {
94
18.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
18.3M
    }
96
4.57M
  }
97
1.14M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.14M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.14M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.14M
  static_assert(S % 2 == 0, "S should be even");
70
1.14M
  float temp[kDCTBlockSize];
71
1.14M
  constexpr size_t num_2x2 = S / 2;
72
2.28M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.28M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.14M
      float c00 = block[y * 2 * stride + x * 2];
75
1.14M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.14M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.14M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.14M
      float r00 = c00 + c01 + c10 + c11;
79
1.14M
      float r01 = c00 + c01 - c10 - c11;
80
1.14M
      float r10 = c00 - c01 + c10 - c11;
81
1.14M
      float r11 = c00 - c01 - c10 + c11;
82
1.14M
      r00 *= 0.25f;
83
1.14M
      r01 *= 0.25f;
84
1.14M
      r10 *= 0.25f;
85
1.14M
      r11 *= 0.25f;
86
1.14M
      temp[y * kBlockDim + x] = r00;
87
1.14M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.14M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.14M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.14M
    }
91
1.14M
  }
92
3.43M
  for (size_t y = 0; y < S; y++) {
93
6.86M
    for (size_t x = 0; x < S; x++) {
94
4.57M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.57M
    }
96
2.28M
  }
97
1.14M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
78.4M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
78.4M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
78.4M
      {
102
78.4M
          0.2500000000000000,
103
78.4M
          0.8769029297991420f,
104
78.4M
          0.0000000000000000,
105
78.4M
          0.0000000000000000,
106
78.4M
          0.0000000000000000,
107
78.4M
          -0.4105377591765233f,
108
78.4M
          0.0000000000000000,
109
78.4M
          0.0000000000000000,
110
78.4M
          0.0000000000000000,
111
78.4M
          0.0000000000000000,
112
78.4M
          0.0000000000000000,
113
78.4M
          0.0000000000000000,
114
78.4M
          0.0000000000000000,
115
78.4M
          0.0000000000000000,
116
78.4M
          0.0000000000000000,
117
78.4M
          0.0000000000000000,
118
78.4M
      },
119
78.4M
      {
120
78.4M
          0.2500000000000000,
121
78.4M
          0.2206518106944235f,
122
78.4M
          0.0000000000000000,
123
78.4M
          0.0000000000000000,
124
78.4M
          -0.7071067811865474f,
125
78.4M
          0.6235485373547691f,
126
78.4M
          0.0000000000000000,
127
78.4M
          0.0000000000000000,
128
78.4M
          0.0000000000000000,
129
78.4M
          0.0000000000000000,
130
78.4M
          0.0000000000000000,
131
78.4M
          0.0000000000000000,
132
78.4M
          0.0000000000000000,
133
78.4M
          0.0000000000000000,
134
78.4M
          0.0000000000000000,
135
78.4M
          0.0000000000000000,
136
78.4M
      },
137
78.4M
      {
138
78.4M
          0.2500000000000000,
139
78.4M
          -0.1014005039375376f,
140
78.4M
          0.4067007583026075f,
141
78.4M
          -0.2125574805828875f,
142
78.4M
          0.0000000000000000,
143
78.4M
          -0.0643507165794627f,
144
78.4M
          -0.4517556589999482f,
145
78.4M
          -0.3046847507248690f,
146
78.4M
          0.3017929516615495f,
147
78.4M
          0.4082482904638627f,
148
78.4M
          0.1747866975480809f,
149
78.4M
          -0.2110560104933578f,
150
78.4M
          -0.1426608480880726f,
151
78.4M
          -0.1381354035075859f,
152
78.4M
          -0.1743760259965107f,
153
78.4M
          0.1135498731499434f,
154
78.4M
      },
155
78.4M
      {
156
78.4M
          0.2500000000000000,
157
78.4M
          -0.1014005039375375f,
158
78.4M
          0.4444481661973445f,
159
78.4M
          0.3085497062849767f,
160
78.4M
          0.0000000000000000f,
161
78.4M
          -0.0643507165794627f,
162
78.4M
          0.1585450355184006f,
163
78.4M
          0.5112616136591823f,
164
78.4M
          0.2579236279634118f,
165
78.4M
          0.0000000000000000,
166
78.4M
          0.0812611176717539f,
167
78.4M
          0.1856718091610980f,
168
78.4M
          -0.3416446842253372f,
169
78.4M
          0.3302282550303788f,
170
78.4M
          0.0702790691196284f,
171
78.4M
          -0.0741750459581035f,
172
78.4M
      },
173
78.4M
      {
174
78.4M
          0.2500000000000000,
175
78.4M
          0.2206518106944236f,
176
78.4M
          0.0000000000000000,
177
78.4M
          0.0000000000000000,
178
78.4M
          0.7071067811865476f,
179
78.4M
          0.6235485373547694f,
180
78.4M
          0.0000000000000000,
181
78.4M
          0.0000000000000000,
182
78.4M
          0.0000000000000000,
183
78.4M
          0.0000000000000000,
184
78.4M
          0.0000000000000000,
185
78.4M
          0.0000000000000000,
186
78.4M
          0.0000000000000000,
187
78.4M
          0.0000000000000000,
188
78.4M
          0.0000000000000000,
189
78.4M
          0.0000000000000000,
190
78.4M
      },
191
78.4M
      {
192
78.4M
          0.2500000000000000,
193
78.4M
          -0.1014005039375378f,
194
78.4M
          0.0000000000000000,
195
78.4M
          0.4706702258572536f,
196
78.4M
          0.0000000000000000,
197
78.4M
          -0.0643507165794628f,
198
78.4M
          -0.0403851516082220f,
199
78.4M
          0.0000000000000000,
200
78.4M
          0.1627234014286620f,
201
78.4M
          0.0000000000000000,
202
78.4M
          0.0000000000000000,
203
78.4M
          0.0000000000000000,
204
78.4M
          0.7367497537172237f,
205
78.4M
          0.0875511500058708f,
206
78.4M
          -0.2921026642334881f,
207
78.4M
          0.1940289303259434f,
208
78.4M
      },
209
78.4M
      {
210
78.4M
          0.2500000000000000,
211
78.4M
          -0.1014005039375377f,
212
78.4M
          0.1957439937204294f,
213
78.4M
          -0.1621205195722993f,
214
78.4M
          0.0000000000000000,
215
78.4M
          -0.0643507165794628f,
216
78.4M
          0.0074182263792424f,
217
78.4M
          -0.2904801297289980f,
218
78.4M
          0.0952002265347504f,
219
78.4M
          0.0000000000000000,
220
78.4M
          -0.3675398009862027f,
221
78.4M
          0.4921585901373873f,
222
78.4M
          0.2462710772207515f,
223
78.4M
          -0.0794670660590957f,
224
78.4M
          0.3623817333531167f,
225
78.4M
          -0.4351904965232280f,
226
78.4M
      },
227
78.4M
      {
228
78.4M
          0.2500000000000000,
229
78.4M
          -0.1014005039375376f,
230
78.4M
          0.2929100136981264f,
231
78.4M
          0.0000000000000000,
232
78.4M
          0.0000000000000000,
233
78.4M
          -0.0643507165794627f,
234
78.4M
          0.3935103426921017f,
235
78.4M
          -0.0657870154914280f,
236
78.4M
          0.0000000000000000,
237
78.4M
          -0.4082482904638628f,
238
78.4M
          -0.3078822139579090f,
239
78.4M
          -0.3852501370925192f,
240
78.4M
          -0.0857401903551931f,
241
78.4M
          -0.4613374887461511f,
242
78.4M
          0.0000000000000000,
243
78.4M
          0.2191868483885747f,
244
78.4M
      },
245
78.4M
      {
246
78.4M
          0.2500000000000000,
247
78.4M
          -0.1014005039375376f,
248
78.4M
          -0.4067007583026072f,
249
78.4M
          -0.2125574805828705f,
250
78.4M
          0.0000000000000000,
251
78.4M
          -0.0643507165794627f,
252
78.4M
          -0.4517556589999464f,
253
78.4M
          0.3046847507248840f,
254
78.4M
          0.3017929516615503f,
255
78.4M
          -0.4082482904638635f,
256
78.4M
          -0.1747866975480813f,
257
78.4M
          0.2110560104933581f,
258
78.4M
          -0.1426608480880734f,
259
78.4M
          -0.1381354035075829f,
260
78.4M
          -0.1743760259965108f,
261
78.4M
          0.1135498731499426f,
262
78.4M
      },
263
78.4M
      {
264
78.4M
          0.2500000000000000,
265
78.4M
          -0.1014005039375377f,
266
78.4M
          -0.1957439937204287f,
267
78.4M
          -0.1621205195722833f,
268
78.4M
          0.0000000000000000,
269
78.4M
          -0.0643507165794628f,
270
78.4M
          0.0074182263792444f,
271
78.4M
          0.2904801297290076f,
272
78.4M
          0.0952002265347505f,
273
78.4M
          0.0000000000000000,
274
78.4M
          0.3675398009862011f,
275
78.4M
          -0.4921585901373891f,
276
78.4M
          0.2462710772207514f,
277
78.4M
          -0.0794670660591026f,
278
78.4M
          0.3623817333531165f,
279
78.4M
          -0.4351904965232251f,
280
78.4M
      },
281
78.4M
      {
282
78.4M
          0.2500000000000000,
283
78.4M
          -0.1014005039375375f,
284
78.4M
          0.0000000000000000,
285
78.4M
          -0.4706702258572528f,
286
78.4M
          0.0000000000000000,
287
78.4M
          -0.0643507165794627f,
288
78.4M
          0.1107416575309343f,
289
78.4M
          0.0000000000000000,
290
78.4M
          -0.1627234014286617f,
291
78.4M
          0.0000000000000000,
292
78.4M
          0.0000000000000000,
293
78.4M
          0.0000000000000000,
294
78.4M
          0.1488339922711357f,
295
78.4M
          0.4972464710953509f,
296
78.4M
          0.2921026642334879f,
297
78.4M
          0.5550443808910661f,
298
78.4M
      },
299
78.4M
      {
300
78.4M
          0.2500000000000000,
301
78.4M
          -0.1014005039375377f,
302
78.4M
          0.1137907446044809f,
303
78.4M
          -0.1464291867126764f,
304
78.4M
          0.0000000000000000,
305
78.4M
          -0.0643507165794628f,
306
78.4M
          0.0829816309488205f,
307
78.4M
          -0.2388977352334460f,
308
78.4M
          -0.3531238544981630f,
309
78.4M
          -0.4082482904638630f,
310
78.4M
          0.4826689115059883f,
311
78.4M
          0.1741941265991622f,
312
78.4M
          -0.0476868035022925f,
313
78.4M
          0.1253805944856366f,
314
78.4M
          -0.4326608024727445f,
315
78.4M
          -0.2546827712406646f,
316
78.4M
      },
317
78.4M
      {
318
78.4M
          0.2500000000000000,
319
78.4M
          -0.1014005039375377f,
320
78.4M
          -0.4444481661973438f,
321
78.4M
          0.3085497062849487f,
322
78.4M
          0.0000000000000000,
323
78.4M
          -0.0643507165794628f,
324
78.4M
          0.1585450355183970f,
325
78.4M
          -0.5112616136592012f,
326
78.4M
          0.2579236279634129f,
327
78.4M
          0.0000000000000000,
328
78.4M
          -0.0812611176717504f,
329
78.4M
          -0.1856718091610990f,
330
78.4M
          -0.3416446842253373f,
331
78.4M
          0.3302282550303805f,
332
78.4M
          0.0702790691196282f,
333
78.4M
          -0.0741750459581023f,
334
78.4M
      },
335
78.4M
      {
336
78.4M
          0.2500000000000000,
337
78.4M
          -0.1014005039375376f,
338
78.4M
          -0.2929100136981264f,
339
78.4M
          0.0000000000000000,
340
78.4M
          0.0000000000000000,
341
78.4M
          -0.0643507165794627f,
342
78.4M
          0.3935103426921022f,
343
78.4M
          0.0657870154914254f,
344
78.4M
          0.0000000000000000,
345
78.4M
          0.4082482904638634f,
346
78.4M
          0.3078822139579031f,
347
78.4M
          0.3852501370925211f,
348
78.4M
          -0.0857401903551927f,
349
78.4M
          -0.4613374887461554f,
350
78.4M
          0.0000000000000000,
351
78.4M
          0.2191868483885728f,
352
78.4M
      },
353
78.4M
      {
354
78.4M
          0.2500000000000000,
355
78.4M
          -0.1014005039375376f,
356
78.4M
          -0.1137907446044814f,
357
78.4M
          -0.1464291867126654f,
358
78.4M
          0.0000000000000000,
359
78.4M
          -0.0643507165794627f,
360
78.4M
          0.0829816309488214f,
361
78.4M
          0.2388977352334547f,
362
78.4M
          -0.3531238544981624f,
363
78.4M
          0.4082482904638630f,
364
78.4M
          -0.4826689115059858f,
365
78.4M
          -0.1741941265991621f,
366
78.4M
          -0.0476868035022928f,
367
78.4M
          0.1253805944856431f,
368
78.4M
          -0.4326608024727457f,
369
78.4M
          -0.2546827712406641f,
370
78.4M
      },
371
78.4M
      {
372
78.4M
          0.2500000000000000,
373
78.4M
          -0.1014005039375374f,
374
78.4M
          0.0000000000000000,
375
78.4M
          0.4251149611657548f,
376
78.4M
          0.0000000000000000,
377
78.4M
          -0.0643507165794626f,
378
78.4M
          -0.4517556589999480f,
379
78.4M
          0.0000000000000000,
380
78.4M
          -0.6035859033230976f,
381
78.4M
          0.0000000000000000,
382
78.4M
          0.0000000000000000,
383
78.4M
          0.0000000000000000,
384
78.4M
          -0.1426608480880724f,
385
78.4M
          -0.1381354035075845f,
386
78.4M
          0.3487520519930227f,
387
78.4M
          0.1135498731499429f,
388
78.4M
      },
389
78.4M
  };
390
391
78.4M
  const HWY_CAPPED(float, 16) d;
392
235M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
156M
    auto scalar = Zero(d);
394
2.66G
    for (size_t j = 0; j < 16; j++) {
395
2.50G
      auto px = Set(d, pixels[j]);
396
2.50G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.50G
      scalar = MulAdd(px, basis, scalar);
398
2.50G
    }
399
156M
    Store(scalar, d, coeffs + i);
400
156M
  }
401
78.4M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.57M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.57M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.57M
      {
102
1.57M
          0.2500000000000000,
103
1.57M
          0.8769029297991420f,
104
1.57M
          0.0000000000000000,
105
1.57M
          0.0000000000000000,
106
1.57M
          0.0000000000000000,
107
1.57M
          -0.4105377591765233f,
108
1.57M
          0.0000000000000000,
109
1.57M
          0.0000000000000000,
110
1.57M
          0.0000000000000000,
111
1.57M
          0.0000000000000000,
112
1.57M
          0.0000000000000000,
113
1.57M
          0.0000000000000000,
114
1.57M
          0.0000000000000000,
115
1.57M
          0.0000000000000000,
116
1.57M
          0.0000000000000000,
117
1.57M
          0.0000000000000000,
118
1.57M
      },
119
1.57M
      {
120
1.57M
          0.2500000000000000,
121
1.57M
          0.2206518106944235f,
122
1.57M
          0.0000000000000000,
123
1.57M
          0.0000000000000000,
124
1.57M
          -0.7071067811865474f,
125
1.57M
          0.6235485373547691f,
126
1.57M
          0.0000000000000000,
127
1.57M
          0.0000000000000000,
128
1.57M
          0.0000000000000000,
129
1.57M
          0.0000000000000000,
130
1.57M
          0.0000000000000000,
131
1.57M
          0.0000000000000000,
132
1.57M
          0.0000000000000000,
133
1.57M
          0.0000000000000000,
134
1.57M
          0.0000000000000000,
135
1.57M
          0.0000000000000000,
136
1.57M
      },
137
1.57M
      {
138
1.57M
          0.2500000000000000,
139
1.57M
          -0.1014005039375376f,
140
1.57M
          0.4067007583026075f,
141
1.57M
          -0.2125574805828875f,
142
1.57M
          0.0000000000000000,
143
1.57M
          -0.0643507165794627f,
144
1.57M
          -0.4517556589999482f,
145
1.57M
          -0.3046847507248690f,
146
1.57M
          0.3017929516615495f,
147
1.57M
          0.4082482904638627f,
148
1.57M
          0.1747866975480809f,
149
1.57M
          -0.2110560104933578f,
150
1.57M
          -0.1426608480880726f,
151
1.57M
          -0.1381354035075859f,
152
1.57M
          -0.1743760259965107f,
153
1.57M
          0.1135498731499434f,
154
1.57M
      },
155
1.57M
      {
156
1.57M
          0.2500000000000000,
157
1.57M
          -0.1014005039375375f,
158
1.57M
          0.4444481661973445f,
159
1.57M
          0.3085497062849767f,
160
1.57M
          0.0000000000000000f,
161
1.57M
          -0.0643507165794627f,
162
1.57M
          0.1585450355184006f,
163
1.57M
          0.5112616136591823f,
164
1.57M
          0.2579236279634118f,
165
1.57M
          0.0000000000000000,
166
1.57M
          0.0812611176717539f,
167
1.57M
          0.1856718091610980f,
168
1.57M
          -0.3416446842253372f,
169
1.57M
          0.3302282550303788f,
170
1.57M
          0.0702790691196284f,
171
1.57M
          -0.0741750459581035f,
172
1.57M
      },
173
1.57M
      {
174
1.57M
          0.2500000000000000,
175
1.57M
          0.2206518106944236f,
176
1.57M
          0.0000000000000000,
177
1.57M
          0.0000000000000000,
178
1.57M
          0.7071067811865476f,
179
1.57M
          0.6235485373547694f,
180
1.57M
          0.0000000000000000,
181
1.57M
          0.0000000000000000,
182
1.57M
          0.0000000000000000,
183
1.57M
          0.0000000000000000,
184
1.57M
          0.0000000000000000,
185
1.57M
          0.0000000000000000,
186
1.57M
          0.0000000000000000,
187
1.57M
          0.0000000000000000,
188
1.57M
          0.0000000000000000,
189
1.57M
          0.0000000000000000,
190
1.57M
      },
191
1.57M
      {
192
1.57M
          0.2500000000000000,
193
1.57M
          -0.1014005039375378f,
194
1.57M
          0.0000000000000000,
195
1.57M
          0.4706702258572536f,
196
1.57M
          0.0000000000000000,
197
1.57M
          -0.0643507165794628f,
198
1.57M
          -0.0403851516082220f,
199
1.57M
          0.0000000000000000,
200
1.57M
          0.1627234014286620f,
201
1.57M
          0.0000000000000000,
202
1.57M
          0.0000000000000000,
203
1.57M
          0.0000000000000000,
204
1.57M
          0.7367497537172237f,
205
1.57M
          0.0875511500058708f,
206
1.57M
          -0.2921026642334881f,
207
1.57M
          0.1940289303259434f,
208
1.57M
      },
209
1.57M
      {
210
1.57M
          0.2500000000000000,
211
1.57M
          -0.1014005039375377f,
212
1.57M
          0.1957439937204294f,
213
1.57M
          -0.1621205195722993f,
214
1.57M
          0.0000000000000000,
215
1.57M
          -0.0643507165794628f,
216
1.57M
          0.0074182263792424f,
217
1.57M
          -0.2904801297289980f,
218
1.57M
          0.0952002265347504f,
219
1.57M
          0.0000000000000000,
220
1.57M
          -0.3675398009862027f,
221
1.57M
          0.4921585901373873f,
222
1.57M
          0.2462710772207515f,
223
1.57M
          -0.0794670660590957f,
224
1.57M
          0.3623817333531167f,
225
1.57M
          -0.4351904965232280f,
226
1.57M
      },
227
1.57M
      {
228
1.57M
          0.2500000000000000,
229
1.57M
          -0.1014005039375376f,
230
1.57M
          0.2929100136981264f,
231
1.57M
          0.0000000000000000,
232
1.57M
          0.0000000000000000,
233
1.57M
          -0.0643507165794627f,
234
1.57M
          0.3935103426921017f,
235
1.57M
          -0.0657870154914280f,
236
1.57M
          0.0000000000000000,
237
1.57M
          -0.4082482904638628f,
238
1.57M
          -0.3078822139579090f,
239
1.57M
          -0.3852501370925192f,
240
1.57M
          -0.0857401903551931f,
241
1.57M
          -0.4613374887461511f,
242
1.57M
          0.0000000000000000,
243
1.57M
          0.2191868483885747f,
244
1.57M
      },
245
1.57M
      {
246
1.57M
          0.2500000000000000,
247
1.57M
          -0.1014005039375376f,
248
1.57M
          -0.4067007583026072f,
249
1.57M
          -0.2125574805828705f,
250
1.57M
          0.0000000000000000,
251
1.57M
          -0.0643507165794627f,
252
1.57M
          -0.4517556589999464f,
253
1.57M
          0.3046847507248840f,
254
1.57M
          0.3017929516615503f,
255
1.57M
          -0.4082482904638635f,
256
1.57M
          -0.1747866975480813f,
257
1.57M
          0.2110560104933581f,
258
1.57M
          -0.1426608480880734f,
259
1.57M
          -0.1381354035075829f,
260
1.57M
          -0.1743760259965108f,
261
1.57M
          0.1135498731499426f,
262
1.57M
      },
263
1.57M
      {
264
1.57M
          0.2500000000000000,
265
1.57M
          -0.1014005039375377f,
266
1.57M
          -0.1957439937204287f,
267
1.57M
          -0.1621205195722833f,
268
1.57M
          0.0000000000000000,
269
1.57M
          -0.0643507165794628f,
270
1.57M
          0.0074182263792444f,
271
1.57M
          0.2904801297290076f,
272
1.57M
          0.0952002265347505f,
273
1.57M
          0.0000000000000000,
274
1.57M
          0.3675398009862011f,
275
1.57M
          -0.4921585901373891f,
276
1.57M
          0.2462710772207514f,
277
1.57M
          -0.0794670660591026f,
278
1.57M
          0.3623817333531165f,
279
1.57M
          -0.4351904965232251f,
280
1.57M
      },
281
1.57M
      {
282
1.57M
          0.2500000000000000,
283
1.57M
          -0.1014005039375375f,
284
1.57M
          0.0000000000000000,
285
1.57M
          -0.4706702258572528f,
286
1.57M
          0.0000000000000000,
287
1.57M
          -0.0643507165794627f,
288
1.57M
          0.1107416575309343f,
289
1.57M
          0.0000000000000000,
290
1.57M
          -0.1627234014286617f,
291
1.57M
          0.0000000000000000,
292
1.57M
          0.0000000000000000,
293
1.57M
          0.0000000000000000,
294
1.57M
          0.1488339922711357f,
295
1.57M
          0.4972464710953509f,
296
1.57M
          0.2921026642334879f,
297
1.57M
          0.5550443808910661f,
298
1.57M
      },
299
1.57M
      {
300
1.57M
          0.2500000000000000,
301
1.57M
          -0.1014005039375377f,
302
1.57M
          0.1137907446044809f,
303
1.57M
          -0.1464291867126764f,
304
1.57M
          0.0000000000000000,
305
1.57M
          -0.0643507165794628f,
306
1.57M
          0.0829816309488205f,
307
1.57M
          -0.2388977352334460f,
308
1.57M
          -0.3531238544981630f,
309
1.57M
          -0.4082482904638630f,
310
1.57M
          0.4826689115059883f,
311
1.57M
          0.1741941265991622f,
312
1.57M
          -0.0476868035022925f,
313
1.57M
          0.1253805944856366f,
314
1.57M
          -0.4326608024727445f,
315
1.57M
          -0.2546827712406646f,
316
1.57M
      },
317
1.57M
      {
318
1.57M
          0.2500000000000000,
319
1.57M
          -0.1014005039375377f,
320
1.57M
          -0.4444481661973438f,
321
1.57M
          0.3085497062849487f,
322
1.57M
          0.0000000000000000,
323
1.57M
          -0.0643507165794628f,
324
1.57M
          0.1585450355183970f,
325
1.57M
          -0.5112616136592012f,
326
1.57M
          0.2579236279634129f,
327
1.57M
          0.0000000000000000,
328
1.57M
          -0.0812611176717504f,
329
1.57M
          -0.1856718091610990f,
330
1.57M
          -0.3416446842253373f,
331
1.57M
          0.3302282550303805f,
332
1.57M
          0.0702790691196282f,
333
1.57M
          -0.0741750459581023f,
334
1.57M
      },
335
1.57M
      {
336
1.57M
          0.2500000000000000,
337
1.57M
          -0.1014005039375376f,
338
1.57M
          -0.2929100136981264f,
339
1.57M
          0.0000000000000000,
340
1.57M
          0.0000000000000000,
341
1.57M
          -0.0643507165794627f,
342
1.57M
          0.3935103426921022f,
343
1.57M
          0.0657870154914254f,
344
1.57M
          0.0000000000000000,
345
1.57M
          0.4082482904638634f,
346
1.57M
          0.3078822139579031f,
347
1.57M
          0.3852501370925211f,
348
1.57M
          -0.0857401903551927f,
349
1.57M
          -0.4613374887461554f,
350
1.57M
          0.0000000000000000,
351
1.57M
          0.2191868483885728f,
352
1.57M
      },
353
1.57M
      {
354
1.57M
          0.2500000000000000,
355
1.57M
          -0.1014005039375376f,
356
1.57M
          -0.1137907446044814f,
357
1.57M
          -0.1464291867126654f,
358
1.57M
          0.0000000000000000,
359
1.57M
          -0.0643507165794627f,
360
1.57M
          0.0829816309488214f,
361
1.57M
          0.2388977352334547f,
362
1.57M
          -0.3531238544981624f,
363
1.57M
          0.4082482904638630f,
364
1.57M
          -0.4826689115059858f,
365
1.57M
          -0.1741941265991621f,
366
1.57M
          -0.0476868035022928f,
367
1.57M
          0.1253805944856431f,
368
1.57M
          -0.4326608024727457f,
369
1.57M
          -0.2546827712406641f,
370
1.57M
      },
371
1.57M
      {
372
1.57M
          0.2500000000000000,
373
1.57M
          -0.1014005039375374f,
374
1.57M
          0.0000000000000000,
375
1.57M
          0.4251149611657548f,
376
1.57M
          0.0000000000000000,
377
1.57M
          -0.0643507165794626f,
378
1.57M
          -0.4517556589999480f,
379
1.57M
          0.0000000000000000,
380
1.57M
          -0.6035859033230976f,
381
1.57M
          0.0000000000000000,
382
1.57M
          0.0000000000000000,
383
1.57M
          0.0000000000000000,
384
1.57M
          -0.1426608480880724f,
385
1.57M
          -0.1381354035075845f,
386
1.57M
          0.3487520519930227f,
387
1.57M
          0.1135498731499429f,
388
1.57M
      },
389
1.57M
  };
390
391
1.57M
  const HWY_CAPPED(float, 16) d;
392
4.73M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
3.15M
    auto scalar = Zero(d);
394
53.6M
    for (size_t j = 0; j < 16; j++) {
395
50.5M
      auto px = Set(d, pixels[j]);
396
50.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
50.5M
      scalar = MulAdd(px, basis, scalar);
398
50.5M
    }
399
3.15M
    Store(scalar, d, coeffs + i);
400
3.15M
  }
401
1.57M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
75.2M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
75.2M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
75.2M
      {
102
75.2M
          0.2500000000000000,
103
75.2M
          0.8769029297991420f,
104
75.2M
          0.0000000000000000,
105
75.2M
          0.0000000000000000,
106
75.2M
          0.0000000000000000,
107
75.2M
          -0.4105377591765233f,
108
75.2M
          0.0000000000000000,
109
75.2M
          0.0000000000000000,
110
75.2M
          0.0000000000000000,
111
75.2M
          0.0000000000000000,
112
75.2M
          0.0000000000000000,
113
75.2M
          0.0000000000000000,
114
75.2M
          0.0000000000000000,
115
75.2M
          0.0000000000000000,
116
75.2M
          0.0000000000000000,
117
75.2M
          0.0000000000000000,
118
75.2M
      },
119
75.2M
      {
120
75.2M
          0.2500000000000000,
121
75.2M
          0.2206518106944235f,
122
75.2M
          0.0000000000000000,
123
75.2M
          0.0000000000000000,
124
75.2M
          -0.7071067811865474f,
125
75.2M
          0.6235485373547691f,
126
75.2M
          0.0000000000000000,
127
75.2M
          0.0000000000000000,
128
75.2M
          0.0000000000000000,
129
75.2M
          0.0000000000000000,
130
75.2M
          0.0000000000000000,
131
75.2M
          0.0000000000000000,
132
75.2M
          0.0000000000000000,
133
75.2M
          0.0000000000000000,
134
75.2M
          0.0000000000000000,
135
75.2M
          0.0000000000000000,
136
75.2M
      },
137
75.2M
      {
138
75.2M
          0.2500000000000000,
139
75.2M
          -0.1014005039375376f,
140
75.2M
          0.4067007583026075f,
141
75.2M
          -0.2125574805828875f,
142
75.2M
          0.0000000000000000,
143
75.2M
          -0.0643507165794627f,
144
75.2M
          -0.4517556589999482f,
145
75.2M
          -0.3046847507248690f,
146
75.2M
          0.3017929516615495f,
147
75.2M
          0.4082482904638627f,
148
75.2M
          0.1747866975480809f,
149
75.2M
          -0.2110560104933578f,
150
75.2M
          -0.1426608480880726f,
151
75.2M
          -0.1381354035075859f,
152
75.2M
          -0.1743760259965107f,
153
75.2M
          0.1135498731499434f,
154
75.2M
      },
155
75.2M
      {
156
75.2M
          0.2500000000000000,
157
75.2M
          -0.1014005039375375f,
158
75.2M
          0.4444481661973445f,
159
75.2M
          0.3085497062849767f,
160
75.2M
          0.0000000000000000f,
161
75.2M
          -0.0643507165794627f,
162
75.2M
          0.1585450355184006f,
163
75.2M
          0.5112616136591823f,
164
75.2M
          0.2579236279634118f,
165
75.2M
          0.0000000000000000,
166
75.2M
          0.0812611176717539f,
167
75.2M
          0.1856718091610980f,
168
75.2M
          -0.3416446842253372f,
169
75.2M
          0.3302282550303788f,
170
75.2M
          0.0702790691196284f,
171
75.2M
          -0.0741750459581035f,
172
75.2M
      },
173
75.2M
      {
174
75.2M
          0.2500000000000000,
175
75.2M
          0.2206518106944236f,
176
75.2M
          0.0000000000000000,
177
75.2M
          0.0000000000000000,
178
75.2M
          0.7071067811865476f,
179
75.2M
          0.6235485373547694f,
180
75.2M
          0.0000000000000000,
181
75.2M
          0.0000000000000000,
182
75.2M
          0.0000000000000000,
183
75.2M
          0.0000000000000000,
184
75.2M
          0.0000000000000000,
185
75.2M
          0.0000000000000000,
186
75.2M
          0.0000000000000000,
187
75.2M
          0.0000000000000000,
188
75.2M
          0.0000000000000000,
189
75.2M
          0.0000000000000000,
190
75.2M
      },
191
75.2M
      {
192
75.2M
          0.2500000000000000,
193
75.2M
          -0.1014005039375378f,
194
75.2M
          0.0000000000000000,
195
75.2M
          0.4706702258572536f,
196
75.2M
          0.0000000000000000,
197
75.2M
          -0.0643507165794628f,
198
75.2M
          -0.0403851516082220f,
199
75.2M
          0.0000000000000000,
200
75.2M
          0.1627234014286620f,
201
75.2M
          0.0000000000000000,
202
75.2M
          0.0000000000000000,
203
75.2M
          0.0000000000000000,
204
75.2M
          0.7367497537172237f,
205
75.2M
          0.0875511500058708f,
206
75.2M
          -0.2921026642334881f,
207
75.2M
          0.1940289303259434f,
208
75.2M
      },
209
75.2M
      {
210
75.2M
          0.2500000000000000,
211
75.2M
          -0.1014005039375377f,
212
75.2M
          0.1957439937204294f,
213
75.2M
          -0.1621205195722993f,
214
75.2M
          0.0000000000000000,
215
75.2M
          -0.0643507165794628f,
216
75.2M
          0.0074182263792424f,
217
75.2M
          -0.2904801297289980f,
218
75.2M
          0.0952002265347504f,
219
75.2M
          0.0000000000000000,
220
75.2M
          -0.3675398009862027f,
221
75.2M
          0.4921585901373873f,
222
75.2M
          0.2462710772207515f,
223
75.2M
          -0.0794670660590957f,
224
75.2M
          0.3623817333531167f,
225
75.2M
          -0.4351904965232280f,
226
75.2M
      },
227
75.2M
      {
228
75.2M
          0.2500000000000000,
229
75.2M
          -0.1014005039375376f,
230
75.2M
          0.2929100136981264f,
231
75.2M
          0.0000000000000000,
232
75.2M
          0.0000000000000000,
233
75.2M
          -0.0643507165794627f,
234
75.2M
          0.3935103426921017f,
235
75.2M
          -0.0657870154914280f,
236
75.2M
          0.0000000000000000,
237
75.2M
          -0.4082482904638628f,
238
75.2M
          -0.3078822139579090f,
239
75.2M
          -0.3852501370925192f,
240
75.2M
          -0.0857401903551931f,
241
75.2M
          -0.4613374887461511f,
242
75.2M
          0.0000000000000000,
243
75.2M
          0.2191868483885747f,
244
75.2M
      },
245
75.2M
      {
246
75.2M
          0.2500000000000000,
247
75.2M
          -0.1014005039375376f,
248
75.2M
          -0.4067007583026072f,
249
75.2M
          -0.2125574805828705f,
250
75.2M
          0.0000000000000000,
251
75.2M
          -0.0643507165794627f,
252
75.2M
          -0.4517556589999464f,
253
75.2M
          0.3046847507248840f,
254
75.2M
          0.3017929516615503f,
255
75.2M
          -0.4082482904638635f,
256
75.2M
          -0.1747866975480813f,
257
75.2M
          0.2110560104933581f,
258
75.2M
          -0.1426608480880734f,
259
75.2M
          -0.1381354035075829f,
260
75.2M
          -0.1743760259965108f,
261
75.2M
          0.1135498731499426f,
262
75.2M
      },
263
75.2M
      {
264
75.2M
          0.2500000000000000,
265
75.2M
          -0.1014005039375377f,
266
75.2M
          -0.1957439937204287f,
267
75.2M
          -0.1621205195722833f,
268
75.2M
          0.0000000000000000,
269
75.2M
          -0.0643507165794628f,
270
75.2M
          0.0074182263792444f,
271
75.2M
          0.2904801297290076f,
272
75.2M
          0.0952002265347505f,
273
75.2M
          0.0000000000000000,
274
75.2M
          0.3675398009862011f,
275
75.2M
          -0.4921585901373891f,
276
75.2M
          0.2462710772207514f,
277
75.2M
          -0.0794670660591026f,
278
75.2M
          0.3623817333531165f,
279
75.2M
          -0.4351904965232251f,
280
75.2M
      },
281
75.2M
      {
282
75.2M
          0.2500000000000000,
283
75.2M
          -0.1014005039375375f,
284
75.2M
          0.0000000000000000,
285
75.2M
          -0.4706702258572528f,
286
75.2M
          0.0000000000000000,
287
75.2M
          -0.0643507165794627f,
288
75.2M
          0.1107416575309343f,
289
75.2M
          0.0000000000000000,
290
75.2M
          -0.1627234014286617f,
291
75.2M
          0.0000000000000000,
292
75.2M
          0.0000000000000000,
293
75.2M
          0.0000000000000000,
294
75.2M
          0.1488339922711357f,
295
75.2M
          0.4972464710953509f,
296
75.2M
          0.2921026642334879f,
297
75.2M
          0.5550443808910661f,
298
75.2M
      },
299
75.2M
      {
300
75.2M
          0.2500000000000000,
301
75.2M
          -0.1014005039375377f,
302
75.2M
          0.1137907446044809f,
303
75.2M
          -0.1464291867126764f,
304
75.2M
          0.0000000000000000,
305
75.2M
          -0.0643507165794628f,
306
75.2M
          0.0829816309488205f,
307
75.2M
          -0.2388977352334460f,
308
75.2M
          -0.3531238544981630f,
309
75.2M
          -0.4082482904638630f,
310
75.2M
          0.4826689115059883f,
311
75.2M
          0.1741941265991622f,
312
75.2M
          -0.0476868035022925f,
313
75.2M
          0.1253805944856366f,
314
75.2M
          -0.4326608024727445f,
315
75.2M
          -0.2546827712406646f,
316
75.2M
      },
317
75.2M
      {
318
75.2M
          0.2500000000000000,
319
75.2M
          -0.1014005039375377f,
320
75.2M
          -0.4444481661973438f,
321
75.2M
          0.3085497062849487f,
322
75.2M
          0.0000000000000000,
323
75.2M
          -0.0643507165794628f,
324
75.2M
          0.1585450355183970f,
325
75.2M
          -0.5112616136592012f,
326
75.2M
          0.2579236279634129f,
327
75.2M
          0.0000000000000000,
328
75.2M
          -0.0812611176717504f,
329
75.2M
          -0.1856718091610990f,
330
75.2M
          -0.3416446842253373f,
331
75.2M
          0.3302282550303805f,
332
75.2M
          0.0702790691196282f,
333
75.2M
          -0.0741750459581023f,
334
75.2M
      },
335
75.2M
      {
336
75.2M
          0.2500000000000000,
337
75.2M
          -0.1014005039375376f,
338
75.2M
          -0.2929100136981264f,
339
75.2M
          0.0000000000000000,
340
75.2M
          0.0000000000000000,
341
75.2M
          -0.0643507165794627f,
342
75.2M
          0.3935103426921022f,
343
75.2M
          0.0657870154914254f,
344
75.2M
          0.0000000000000000,
345
75.2M
          0.4082482904638634f,
346
75.2M
          0.3078822139579031f,
347
75.2M
          0.3852501370925211f,
348
75.2M
          -0.0857401903551927f,
349
75.2M
          -0.4613374887461554f,
350
75.2M
          0.0000000000000000,
351
75.2M
          0.2191868483885728f,
352
75.2M
      },
353
75.2M
      {
354
75.2M
          0.2500000000000000,
355
75.2M
          -0.1014005039375376f,
356
75.2M
          -0.1137907446044814f,
357
75.2M
          -0.1464291867126654f,
358
75.2M
          0.0000000000000000,
359
75.2M
          -0.0643507165794627f,
360
75.2M
          0.0829816309488214f,
361
75.2M
          0.2388977352334547f,
362
75.2M
          -0.3531238544981624f,
363
75.2M
          0.4082482904638630f,
364
75.2M
          -0.4826689115059858f,
365
75.2M
          -0.1741941265991621f,
366
75.2M
          -0.0476868035022928f,
367
75.2M
          0.1253805944856431f,
368
75.2M
          -0.4326608024727457f,
369
75.2M
          -0.2546827712406641f,
370
75.2M
      },
371
75.2M
      {
372
75.2M
          0.2500000000000000,
373
75.2M
          -0.1014005039375374f,
374
75.2M
          0.0000000000000000,
375
75.2M
          0.4251149611657548f,
376
75.2M
          0.0000000000000000,
377
75.2M
          -0.0643507165794626f,
378
75.2M
          -0.4517556589999480f,
379
75.2M
          0.0000000000000000,
380
75.2M
          -0.6035859033230976f,
381
75.2M
          0.0000000000000000,
382
75.2M
          0.0000000000000000,
383
75.2M
          0.0000000000000000,
384
75.2M
          -0.1426608480880724f,
385
75.2M
          -0.1381354035075845f,
386
75.2M
          0.3487520519930227f,
387
75.2M
          0.1135498731499429f,
388
75.2M
      },
389
75.2M
  };
390
391
75.2M
  const HWY_CAPPED(float, 16) d;
392
225M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
150M
    auto scalar = Zero(d);
394
2.55G
    for (size_t j = 0; j < 16; j++) {
395
2.40G
      auto px = Set(d, pixels[j]);
396
2.40G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.40G
      scalar = MulAdd(px, basis, scalar);
398
2.40G
    }
399
150M
    Store(scalar, d, coeffs + i);
400
150M
  }
401
75.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.57M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.57M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.57M
      {
102
1.57M
          0.2500000000000000,
103
1.57M
          0.8769029297991420f,
104
1.57M
          0.0000000000000000,
105
1.57M
          0.0000000000000000,
106
1.57M
          0.0000000000000000,
107
1.57M
          -0.4105377591765233f,
108
1.57M
          0.0000000000000000,
109
1.57M
          0.0000000000000000,
110
1.57M
          0.0000000000000000,
111
1.57M
          0.0000000000000000,
112
1.57M
          0.0000000000000000,
113
1.57M
          0.0000000000000000,
114
1.57M
          0.0000000000000000,
115
1.57M
          0.0000000000000000,
116
1.57M
          0.0000000000000000,
117
1.57M
          0.0000000000000000,
118
1.57M
      },
119
1.57M
      {
120
1.57M
          0.2500000000000000,
121
1.57M
          0.2206518106944235f,
122
1.57M
          0.0000000000000000,
123
1.57M
          0.0000000000000000,
124
1.57M
          -0.7071067811865474f,
125
1.57M
          0.6235485373547691f,
126
1.57M
          0.0000000000000000,
127
1.57M
          0.0000000000000000,
128
1.57M
          0.0000000000000000,
129
1.57M
          0.0000000000000000,
130
1.57M
          0.0000000000000000,
131
1.57M
          0.0000000000000000,
132
1.57M
          0.0000000000000000,
133
1.57M
          0.0000000000000000,
134
1.57M
          0.0000000000000000,
135
1.57M
          0.0000000000000000,
136
1.57M
      },
137
1.57M
      {
138
1.57M
          0.2500000000000000,
139
1.57M
          -0.1014005039375376f,
140
1.57M
          0.4067007583026075f,
141
1.57M
          -0.2125574805828875f,
142
1.57M
          0.0000000000000000,
143
1.57M
          -0.0643507165794627f,
144
1.57M
          -0.4517556589999482f,
145
1.57M
          -0.3046847507248690f,
146
1.57M
          0.3017929516615495f,
147
1.57M
          0.4082482904638627f,
148
1.57M
          0.1747866975480809f,
149
1.57M
          -0.2110560104933578f,
150
1.57M
          -0.1426608480880726f,
151
1.57M
          -0.1381354035075859f,
152
1.57M
          -0.1743760259965107f,
153
1.57M
          0.1135498731499434f,
154
1.57M
      },
155
1.57M
      {
156
1.57M
          0.2500000000000000,
157
1.57M
          -0.1014005039375375f,
158
1.57M
          0.4444481661973445f,
159
1.57M
          0.3085497062849767f,
160
1.57M
          0.0000000000000000f,
161
1.57M
          -0.0643507165794627f,
162
1.57M
          0.1585450355184006f,
163
1.57M
          0.5112616136591823f,
164
1.57M
          0.2579236279634118f,
165
1.57M
          0.0000000000000000,
166
1.57M
          0.0812611176717539f,
167
1.57M
          0.1856718091610980f,
168
1.57M
          -0.3416446842253372f,
169
1.57M
          0.3302282550303788f,
170
1.57M
          0.0702790691196284f,
171
1.57M
          -0.0741750459581035f,
172
1.57M
      },
173
1.57M
      {
174
1.57M
          0.2500000000000000,
175
1.57M
          0.2206518106944236f,
176
1.57M
          0.0000000000000000,
177
1.57M
          0.0000000000000000,
178
1.57M
          0.7071067811865476f,
179
1.57M
          0.6235485373547694f,
180
1.57M
          0.0000000000000000,
181
1.57M
          0.0000000000000000,
182
1.57M
          0.0000000000000000,
183
1.57M
          0.0000000000000000,
184
1.57M
          0.0000000000000000,
185
1.57M
          0.0000000000000000,
186
1.57M
          0.0000000000000000,
187
1.57M
          0.0000000000000000,
188
1.57M
          0.0000000000000000,
189
1.57M
          0.0000000000000000,
190
1.57M
      },
191
1.57M
      {
192
1.57M
          0.2500000000000000,
193
1.57M
          -0.1014005039375378f,
194
1.57M
          0.0000000000000000,
195
1.57M
          0.4706702258572536f,
196
1.57M
          0.0000000000000000,
197
1.57M
          -0.0643507165794628f,
198
1.57M
          -0.0403851516082220f,
199
1.57M
          0.0000000000000000,
200
1.57M
          0.1627234014286620f,
201
1.57M
          0.0000000000000000,
202
1.57M
          0.0000000000000000,
203
1.57M
          0.0000000000000000,
204
1.57M
          0.7367497537172237f,
205
1.57M
          0.0875511500058708f,
206
1.57M
          -0.2921026642334881f,
207
1.57M
          0.1940289303259434f,
208
1.57M
      },
209
1.57M
      {
210
1.57M
          0.2500000000000000,
211
1.57M
          -0.1014005039375377f,
212
1.57M
          0.1957439937204294f,
213
1.57M
          -0.1621205195722993f,
214
1.57M
          0.0000000000000000,
215
1.57M
          -0.0643507165794628f,
216
1.57M
          0.0074182263792424f,
217
1.57M
          -0.2904801297289980f,
218
1.57M
          0.0952002265347504f,
219
1.57M
          0.0000000000000000,
220
1.57M
          -0.3675398009862027f,
221
1.57M
          0.4921585901373873f,
222
1.57M
          0.2462710772207515f,
223
1.57M
          -0.0794670660590957f,
224
1.57M
          0.3623817333531167f,
225
1.57M
          -0.4351904965232280f,
226
1.57M
      },
227
1.57M
      {
228
1.57M
          0.2500000000000000,
229
1.57M
          -0.1014005039375376f,
230
1.57M
          0.2929100136981264f,
231
1.57M
          0.0000000000000000,
232
1.57M
          0.0000000000000000,
233
1.57M
          -0.0643507165794627f,
234
1.57M
          0.3935103426921017f,
235
1.57M
          -0.0657870154914280f,
236
1.57M
          0.0000000000000000,
237
1.57M
          -0.4082482904638628f,
238
1.57M
          -0.3078822139579090f,
239
1.57M
          -0.3852501370925192f,
240
1.57M
          -0.0857401903551931f,
241
1.57M
          -0.4613374887461511f,
242
1.57M
          0.0000000000000000,
243
1.57M
          0.2191868483885747f,
244
1.57M
      },
245
1.57M
      {
246
1.57M
          0.2500000000000000,
247
1.57M
          -0.1014005039375376f,
248
1.57M
          -0.4067007583026072f,
249
1.57M
          -0.2125574805828705f,
250
1.57M
          0.0000000000000000,
251
1.57M
          -0.0643507165794627f,
252
1.57M
          -0.4517556589999464f,
253
1.57M
          0.3046847507248840f,
254
1.57M
          0.3017929516615503f,
255
1.57M
          -0.4082482904638635f,
256
1.57M
          -0.1747866975480813f,
257
1.57M
          0.2110560104933581f,
258
1.57M
          -0.1426608480880734f,
259
1.57M
          -0.1381354035075829f,
260
1.57M
          -0.1743760259965108f,
261
1.57M
          0.1135498731499426f,
262
1.57M
      },
263
1.57M
      {
264
1.57M
          0.2500000000000000,
265
1.57M
          -0.1014005039375377f,
266
1.57M
          -0.1957439937204287f,
267
1.57M
          -0.1621205195722833f,
268
1.57M
          0.0000000000000000,
269
1.57M
          -0.0643507165794628f,
270
1.57M
          0.0074182263792444f,
271
1.57M
          0.2904801297290076f,
272
1.57M
          0.0952002265347505f,
273
1.57M
          0.0000000000000000,
274
1.57M
          0.3675398009862011f,
275
1.57M
          -0.4921585901373891f,
276
1.57M
          0.2462710772207514f,
277
1.57M
          -0.0794670660591026f,
278
1.57M
          0.3623817333531165f,
279
1.57M
          -0.4351904965232251f,
280
1.57M
      },
281
1.57M
      {
282
1.57M
          0.2500000000000000,
283
1.57M
          -0.1014005039375375f,
284
1.57M
          0.0000000000000000,
285
1.57M
          -0.4706702258572528f,
286
1.57M
          0.0000000000000000,
287
1.57M
          -0.0643507165794627f,
288
1.57M
          0.1107416575309343f,
289
1.57M
          0.0000000000000000,
290
1.57M
          -0.1627234014286617f,
291
1.57M
          0.0000000000000000,
292
1.57M
          0.0000000000000000,
293
1.57M
          0.0000000000000000,
294
1.57M
          0.1488339922711357f,
295
1.57M
          0.4972464710953509f,
296
1.57M
          0.2921026642334879f,
297
1.57M
          0.5550443808910661f,
298
1.57M
      },
299
1.57M
      {
300
1.57M
          0.2500000000000000,
301
1.57M
          -0.1014005039375377f,
302
1.57M
          0.1137907446044809f,
303
1.57M
          -0.1464291867126764f,
304
1.57M
          0.0000000000000000,
305
1.57M
          -0.0643507165794628f,
306
1.57M
          0.0829816309488205f,
307
1.57M
          -0.2388977352334460f,
308
1.57M
          -0.3531238544981630f,
309
1.57M
          -0.4082482904638630f,
310
1.57M
          0.4826689115059883f,
311
1.57M
          0.1741941265991622f,
312
1.57M
          -0.0476868035022925f,
313
1.57M
          0.1253805944856366f,
314
1.57M
          -0.4326608024727445f,
315
1.57M
          -0.2546827712406646f,
316
1.57M
      },
317
1.57M
      {
318
1.57M
          0.2500000000000000,
319
1.57M
          -0.1014005039375377f,
320
1.57M
          -0.4444481661973438f,
321
1.57M
          0.3085497062849487f,
322
1.57M
          0.0000000000000000,
323
1.57M
          -0.0643507165794628f,
324
1.57M
          0.1585450355183970f,
325
1.57M
          -0.5112616136592012f,
326
1.57M
          0.2579236279634129f,
327
1.57M
          0.0000000000000000,
328
1.57M
          -0.0812611176717504f,
329
1.57M
          -0.1856718091610990f,
330
1.57M
          -0.3416446842253373f,
331
1.57M
          0.3302282550303805f,
332
1.57M
          0.0702790691196282f,
333
1.57M
          -0.0741750459581023f,
334
1.57M
      },
335
1.57M
      {
336
1.57M
          0.2500000000000000,
337
1.57M
          -0.1014005039375376f,
338
1.57M
          -0.2929100136981264f,
339
1.57M
          0.0000000000000000,
340
1.57M
          0.0000000000000000,
341
1.57M
          -0.0643507165794627f,
342
1.57M
          0.3935103426921022f,
343
1.57M
          0.0657870154914254f,
344
1.57M
          0.0000000000000000,
345
1.57M
          0.4082482904638634f,
346
1.57M
          0.3078822139579031f,
347
1.57M
          0.3852501370925211f,
348
1.57M
          -0.0857401903551927f,
349
1.57M
          -0.4613374887461554f,
350
1.57M
          0.0000000000000000,
351
1.57M
          0.2191868483885728f,
352
1.57M
      },
353
1.57M
      {
354
1.57M
          0.2500000000000000,
355
1.57M
          -0.1014005039375376f,
356
1.57M
          -0.1137907446044814f,
357
1.57M
          -0.1464291867126654f,
358
1.57M
          0.0000000000000000,
359
1.57M
          -0.0643507165794627f,
360
1.57M
          0.0829816309488214f,
361
1.57M
          0.2388977352334547f,
362
1.57M
          -0.3531238544981624f,
363
1.57M
          0.4082482904638630f,
364
1.57M
          -0.4826689115059858f,
365
1.57M
          -0.1741941265991621f,
366
1.57M
          -0.0476868035022928f,
367
1.57M
          0.1253805944856431f,
368
1.57M
          -0.4326608024727457f,
369
1.57M
          -0.2546827712406641f,
370
1.57M
      },
371
1.57M
      {
372
1.57M
          0.2500000000000000,
373
1.57M
          -0.1014005039375374f,
374
1.57M
          0.0000000000000000,
375
1.57M
          0.4251149611657548f,
376
1.57M
          0.0000000000000000,
377
1.57M
          -0.0643507165794626f,
378
1.57M
          -0.4517556589999480f,
379
1.57M
          0.0000000000000000,
380
1.57M
          -0.6035859033230976f,
381
1.57M
          0.0000000000000000,
382
1.57M
          0.0000000000000000,
383
1.57M
          0.0000000000000000,
384
1.57M
          -0.1426608480880724f,
385
1.57M
          -0.1381354035075845f,
386
1.57M
          0.3487520519930227f,
387
1.57M
          0.1135498731499429f,
388
1.57M
      },
389
1.57M
  };
390
391
1.57M
  const HWY_CAPPED(float, 16) d;
392
4.73M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
3.15M
    auto scalar = Zero(d);
394
53.6M
    for (size_t j = 0; j < 16; j++) {
395
50.5M
      auto px = Set(d, pixels[j]);
396
50.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
50.5M
      scalar = MulAdd(px, basis, scalar);
398
50.5M
    }
399
3.15M
    Store(scalar, d, coeffs + i);
400
3.15M
  }
401
1.57M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
78.4M
                            float* JXL_RESTRICT coefficients) {
411
78.4M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
78.4M
  size_t afv_x = afv_kind & 1;
413
78.4M
  size_t afv_y = afv_kind / 2;
414
78.4M
  HWY_ALIGN float block[4 * 8] = {};
415
392M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.56G
    for (size_t ix = 0; ix < 4; ix++) {
417
1.25G
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.25G
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.25G
    }
420
313M
  }
421
  // AFV coefficients in (even, even) positions.
422
78.4M
  HWY_ALIGN float coeff[4 * 4];
423
78.4M
  AFVDCT4x4(block, coeff);
424
392M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.56G
    for (size_t ix = 0; ix < 4; ix++) {
426
1.25G
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.25G
    }
428
313M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
78.4M
  ComputeScaledDCT<4, 4>()(
431
78.4M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
78.4M
              pixels_stride),
433
78.4M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
392M
  for (size_t iy = 0; iy < 4; iy++) {
436
2.82G
    for (size_t ix = 0; ix < 8; ix++) {
437
2.50G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.50G
    }
439
313M
  }
440
  // 4x8 DCT of the other half of the block.
441
78.4M
  ComputeScaledDCT<4, 8>()(
442
78.4M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
78.4M
      block, scratch_space);
444
392M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.82G
    for (size_t ix = 0; ix < 8; ix++) {
446
2.50G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.50G
    }
448
313M
  }
449
78.4M
  float block00 = coefficients[0] * 0.25f;
450
78.4M
  float block01 = coefficients[1];
451
78.4M
  float block10 = coefficients[8];
452
78.4M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
78.4M
  coefficients[1] = (block00 - block01) * 0.5f;
454
78.4M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
78.4M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
311k
                            float* JXL_RESTRICT coefficients) {
411
311k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
311k
  size_t afv_x = afv_kind & 1;
413
311k
  size_t afv_y = afv_kind / 2;
414
311k
  HWY_ALIGN float block[4 * 8] = {};
415
1.55M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.23M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.99M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.99M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.99M
    }
420
1.24M
  }
421
  // AFV coefficients in (even, even) positions.
422
311k
  HWY_ALIGN float coeff[4 * 4];
423
311k
  AFVDCT4x4(block, coeff);
424
1.55M
  for (size_t iy = 0; iy < 4; iy++) {
425
6.23M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.99M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.99M
    }
428
1.24M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
311k
  ComputeScaledDCT<4, 4>()(
431
311k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
311k
              pixels_stride),
433
311k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.55M
  for (size_t iy = 0; iy < 4; iy++) {
436
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
437
9.98M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
9.98M
    }
439
1.24M
  }
440
  // 4x8 DCT of the other half of the block.
441
311k
  ComputeScaledDCT<4, 8>()(
442
311k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
311k
      block, scratch_space);
444
1.55M
  for (size_t iy = 0; iy < 4; iy++) {
445
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.98M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
9.98M
    }
448
1.24M
  }
449
311k
  float block00 = coefficients[0] * 0.25f;
450
311k
  float block01 = coefficients[1];
451
311k
  float block10 = coefficients[8];
452
311k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
311k
  coefficients[1] = (block00 - block01) * 0.5f;
454
311k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
311k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
601k
                            float* JXL_RESTRICT coefficients) {
411
601k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
601k
  size_t afv_x = afv_kind & 1;
413
601k
  size_t afv_y = afv_kind / 2;
414
601k
  HWY_ALIGN float block[4 * 8] = {};
415
3.00M
  for (size_t iy = 0; iy < 4; iy++) {
416
12.0M
    for (size_t ix = 0; ix < 4; ix++) {
417
9.62M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
9.62M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
9.62M
    }
420
2.40M
  }
421
  // AFV coefficients in (even, even) positions.
422
601k
  HWY_ALIGN float coeff[4 * 4];
423
601k
  AFVDCT4x4(block, coeff);
424
3.00M
  for (size_t iy = 0; iy < 4; iy++) {
425
12.0M
    for (size_t ix = 0; ix < 4; ix++) {
426
9.62M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
9.62M
    }
428
2.40M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
601k
  ComputeScaledDCT<4, 4>()(
431
601k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
601k
              pixels_stride),
433
601k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
3.00M
  for (size_t iy = 0; iy < 4; iy++) {
436
21.6M
    for (size_t ix = 0; ix < 8; ix++) {
437
19.2M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
19.2M
    }
439
2.40M
  }
440
  // 4x8 DCT of the other half of the block.
441
601k
  ComputeScaledDCT<4, 8>()(
442
601k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
601k
      block, scratch_space);
444
3.00M
  for (size_t iy = 0; iy < 4; iy++) {
445
21.6M
    for (size_t ix = 0; ix < 8; ix++) {
446
19.2M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
19.2M
    }
448
2.40M
  }
449
601k
  float block00 = coefficients[0] * 0.25f;
450
601k
  float block01 = coefficients[1];
451
601k
  float block10 = coefficients[8];
452
601k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
601k
  coefficients[1] = (block00 - block01) * 0.5f;
454
601k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
601k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
273k
                            float* JXL_RESTRICT coefficients) {
411
273k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
273k
  size_t afv_x = afv_kind & 1;
413
273k
  size_t afv_y = afv_kind / 2;
414
273k
  HWY_ALIGN float block[4 * 8] = {};
415
1.36M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.47M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.37M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.37M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.37M
    }
420
1.09M
  }
421
  // AFV coefficients in (even, even) positions.
422
273k
  HWY_ALIGN float coeff[4 * 4];
423
273k
  AFVDCT4x4(block, coeff);
424
1.36M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.47M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.37M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.37M
    }
428
1.09M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
273k
  ComputeScaledDCT<4, 4>()(
431
273k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
273k
              pixels_stride),
433
273k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.36M
  for (size_t iy = 0; iy < 4; iy++) {
436
9.85M
    for (size_t ix = 0; ix < 8; ix++) {
437
8.75M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
8.75M
    }
439
1.09M
  }
440
  // 4x8 DCT of the other half of the block.
441
273k
  ComputeScaledDCT<4, 8>()(
442
273k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
273k
      block, scratch_space);
444
1.36M
  for (size_t iy = 0; iy < 4; iy++) {
445
9.85M
    for (size_t ix = 0; ix < 8; ix++) {
446
8.75M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
8.75M
    }
448
1.09M
  }
449
273k
  float block00 = coefficients[0] * 0.25f;
450
273k
  float block01 = coefficients[1];
451
273k
  float block10 = coefficients[8];
452
273k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
273k
  coefficients[1] = (block00 - block01) * 0.5f;
454
273k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
273k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
391k
                            float* JXL_RESTRICT coefficients) {
411
391k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
391k
  size_t afv_x = afv_kind & 1;
413
391k
  size_t afv_y = afv_kind / 2;
414
391k
  HWY_ALIGN float block[4 * 8] = {};
415
1.95M
  for (size_t iy = 0; iy < 4; iy++) {
416
7.82M
    for (size_t ix = 0; ix < 4; ix++) {
417
6.26M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
6.26M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
6.26M
    }
420
1.56M
  }
421
  // AFV coefficients in (even, even) positions.
422
391k
  HWY_ALIGN float coeff[4 * 4];
423
391k
  AFVDCT4x4(block, coeff);
424
1.95M
  for (size_t iy = 0; iy < 4; iy++) {
425
7.82M
    for (size_t ix = 0; ix < 4; ix++) {
426
6.26M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
6.26M
    }
428
1.56M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
391k
  ComputeScaledDCT<4, 4>()(
431
391k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
391k
              pixels_stride),
433
391k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.95M
  for (size_t iy = 0; iy < 4; iy++) {
436
14.0M
    for (size_t ix = 0; ix < 8; ix++) {
437
12.5M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
12.5M
    }
439
1.56M
  }
440
  // 4x8 DCT of the other half of the block.
441
391k
  ComputeScaledDCT<4, 8>()(
442
391k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
391k
      block, scratch_space);
444
1.95M
  for (size_t iy = 0; iy < 4; iy++) {
445
14.0M
    for (size_t ix = 0; ix < 8; ix++) {
446
12.5M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
12.5M
    }
448
1.56M
  }
449
391k
  float block00 = coefficients[0] * 0.25f;
450
391k
  float block01 = coefficients[1];
451
391k
  float block10 = coefficients[8];
452
391k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
391k
  coefficients[1] = (block00 - block01) * 0.5f;
454
391k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
391k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
18.8M
                            float* JXL_RESTRICT coefficients) {
411
18.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
18.8M
  size_t afv_x = afv_kind & 1;
413
18.8M
  size_t afv_y = afv_kind / 2;
414
18.8M
  HWY_ALIGN float block[4 * 8] = {};
415
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
376M
    for (size_t ix = 0; ix < 4; ix++) {
417
301M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
301M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
301M
    }
420
75.2M
  }
421
  // AFV coefficients in (even, even) positions.
422
18.8M
  HWY_ALIGN float coeff[4 * 4];
423
18.8M
  AFVDCT4x4(block, coeff);
424
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
376M
    for (size_t ix = 0; ix < 4; ix++) {
426
301M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
301M
    }
428
75.2M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
18.8M
  ComputeScaledDCT<4, 4>()(
431
18.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
18.8M
              pixels_stride),
433
18.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
677M
    for (size_t ix = 0; ix < 8; ix++) {
437
602M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
602M
    }
439
75.2M
  }
440
  // 4x8 DCT of the other half of the block.
441
18.8M
  ComputeScaledDCT<4, 8>()(
442
18.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
18.8M
      block, scratch_space);
444
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
677M
    for (size_t ix = 0; ix < 8; ix++) {
446
602M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
602M
    }
448
75.2M
  }
449
18.8M
  float block00 = coefficients[0] * 0.25f;
450
18.8M
  float block01 = coefficients[1];
451
18.8M
  float block10 = coefficients[8];
452
18.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
18.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
18.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
18.8M
                            float* JXL_RESTRICT coefficients) {
411
18.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
18.8M
  size_t afv_x = afv_kind & 1;
413
18.8M
  size_t afv_y = afv_kind / 2;
414
18.8M
  HWY_ALIGN float block[4 * 8] = {};
415
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
376M
    for (size_t ix = 0; ix < 4; ix++) {
417
301M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
301M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
301M
    }
420
75.2M
  }
421
  // AFV coefficients in (even, even) positions.
422
18.8M
  HWY_ALIGN float coeff[4 * 4];
423
18.8M
  AFVDCT4x4(block, coeff);
424
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
376M
    for (size_t ix = 0; ix < 4; ix++) {
426
301M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
301M
    }
428
75.2M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
18.8M
  ComputeScaledDCT<4, 4>()(
431
18.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
18.8M
              pixels_stride),
433
18.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
677M
    for (size_t ix = 0; ix < 8; ix++) {
437
602M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
602M
    }
439
75.2M
  }
440
  // 4x8 DCT of the other half of the block.
441
18.8M
  ComputeScaledDCT<4, 8>()(
442
18.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
18.8M
      block, scratch_space);
444
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
677M
    for (size_t ix = 0; ix < 8; ix++) {
446
602M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
602M
    }
448
75.2M
  }
449
18.8M
  float block00 = coefficients[0] * 0.25f;
450
18.8M
  float block01 = coefficients[1];
451
18.8M
  float block10 = coefficients[8];
452
18.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
18.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
18.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
18.8M
                            float* JXL_RESTRICT coefficients) {
411
18.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
18.8M
  size_t afv_x = afv_kind & 1;
413
18.8M
  size_t afv_y = afv_kind / 2;
414
18.8M
  HWY_ALIGN float block[4 * 8] = {};
415
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
376M
    for (size_t ix = 0; ix < 4; ix++) {
417
301M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
301M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
301M
    }
420
75.2M
  }
421
  // AFV coefficients in (even, even) positions.
422
18.8M
  HWY_ALIGN float coeff[4 * 4];
423
18.8M
  AFVDCT4x4(block, coeff);
424
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
376M
    for (size_t ix = 0; ix < 4; ix++) {
426
301M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
301M
    }
428
75.2M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
18.8M
  ComputeScaledDCT<4, 4>()(
431
18.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
18.8M
              pixels_stride),
433
18.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
677M
    for (size_t ix = 0; ix < 8; ix++) {
437
602M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
602M
    }
439
75.2M
  }
440
  // 4x8 DCT of the other half of the block.
441
18.8M
  ComputeScaledDCT<4, 8>()(
442
18.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
18.8M
      block, scratch_space);
444
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
677M
    for (size_t ix = 0; ix < 8; ix++) {
446
602M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
602M
    }
448
75.2M
  }
449
18.8M
  float block00 = coefficients[0] * 0.25f;
450
18.8M
  float block01 = coefficients[1];
451
18.8M
  float block10 = coefficients[8];
452
18.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
18.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
18.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
18.8M
                            float* JXL_RESTRICT coefficients) {
411
18.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
18.8M
  size_t afv_x = afv_kind & 1;
413
18.8M
  size_t afv_y = afv_kind / 2;
414
18.8M
  HWY_ALIGN float block[4 * 8] = {};
415
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
376M
    for (size_t ix = 0; ix < 4; ix++) {
417
301M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
301M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
301M
    }
420
75.2M
  }
421
  // AFV coefficients in (even, even) positions.
422
18.8M
  HWY_ALIGN float coeff[4 * 4];
423
18.8M
  AFVDCT4x4(block, coeff);
424
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
376M
    for (size_t ix = 0; ix < 4; ix++) {
426
301M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
301M
    }
428
75.2M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
18.8M
  ComputeScaledDCT<4, 4>()(
431
18.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
18.8M
              pixels_stride),
433
18.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
677M
    for (size_t ix = 0; ix < 8; ix++) {
437
602M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
602M
    }
439
75.2M
  }
440
  // 4x8 DCT of the other half of the block.
441
18.8M
  ComputeScaledDCT<4, 8>()(
442
18.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
18.8M
      block, scratch_space);
444
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
677M
    for (size_t ix = 0; ix < 8; ix++) {
446
602M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
602M
    }
448
75.2M
  }
449
18.8M
  float block00 = coefficients[0] * 0.25f;
450
18.8M
  float block01 = coefficients[1];
451
18.8M
  float block10 = coefficients[8];
452
18.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
18.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
18.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
18.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
311k
                            float* JXL_RESTRICT coefficients) {
411
311k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
311k
  size_t afv_x = afv_kind & 1;
413
311k
  size_t afv_y = afv_kind / 2;
414
311k
  HWY_ALIGN float block[4 * 8] = {};
415
1.55M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.23M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.99M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.99M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.99M
    }
420
1.24M
  }
421
  // AFV coefficients in (even, even) positions.
422
311k
  HWY_ALIGN float coeff[4 * 4];
423
311k
  AFVDCT4x4(block, coeff);
424
1.55M
  for (size_t iy = 0; iy < 4; iy++) {
425
6.23M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.99M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.99M
    }
428
1.24M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
311k
  ComputeScaledDCT<4, 4>()(
431
311k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
311k
              pixels_stride),
433
311k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.55M
  for (size_t iy = 0; iy < 4; iy++) {
436
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
437
9.98M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
9.98M
    }
439
1.24M
  }
440
  // 4x8 DCT of the other half of the block.
441
311k
  ComputeScaledDCT<4, 8>()(
442
311k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
311k
      block, scratch_space);
444
1.55M
  for (size_t iy = 0; iy < 4; iy++) {
445
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.98M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
9.98M
    }
448
1.24M
  }
449
311k
  float block00 = coefficients[0] * 0.25f;
450
311k
  float block01 = coefficients[1];
451
311k
  float block10 = coefficients[8];
452
311k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
311k
  coefficients[1] = (block00 - block01) * 0.5f;
454
311k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
311k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
601k
                            float* JXL_RESTRICT coefficients) {
411
601k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
601k
  size_t afv_x = afv_kind & 1;
413
601k
  size_t afv_y = afv_kind / 2;
414
601k
  HWY_ALIGN float block[4 * 8] = {};
415
3.00M
  for (size_t iy = 0; iy < 4; iy++) {
416
12.0M
    for (size_t ix = 0; ix < 4; ix++) {
417
9.62M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
9.62M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
9.62M
    }
420
2.40M
  }
421
  // AFV coefficients in (even, even) positions.
422
601k
  HWY_ALIGN float coeff[4 * 4];
423
601k
  AFVDCT4x4(block, coeff);
424
3.00M
  for (size_t iy = 0; iy < 4; iy++) {
425
12.0M
    for (size_t ix = 0; ix < 4; ix++) {
426
9.62M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
9.62M
    }
428
2.40M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
601k
  ComputeScaledDCT<4, 4>()(
431
601k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
601k
              pixels_stride),
433
601k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
3.00M
  for (size_t iy = 0; iy < 4; iy++) {
436
21.6M
    for (size_t ix = 0; ix < 8; ix++) {
437
19.2M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
19.2M
    }
439
2.40M
  }
440
  // 4x8 DCT of the other half of the block.
441
601k
  ComputeScaledDCT<4, 8>()(
442
601k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
601k
      block, scratch_space);
444
3.00M
  for (size_t iy = 0; iy < 4; iy++) {
445
21.6M
    for (size_t ix = 0; ix < 8; ix++) {
446
19.2M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
19.2M
    }
448
2.40M
  }
449
601k
  float block00 = coefficients[0] * 0.25f;
450
601k
  float block01 = coefficients[1];
451
601k
  float block10 = coefficients[8];
452
601k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
601k
  coefficients[1] = (block00 - block01) * 0.5f;
454
601k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
601k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
273k
                            float* JXL_RESTRICT coefficients) {
411
273k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
273k
  size_t afv_x = afv_kind & 1;
413
273k
  size_t afv_y = afv_kind / 2;
414
273k
  HWY_ALIGN float block[4 * 8] = {};
415
1.36M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.47M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.37M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.37M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.37M
    }
420
1.09M
  }
421
  // AFV coefficients in (even, even) positions.
422
273k
  HWY_ALIGN float coeff[4 * 4];
423
273k
  AFVDCT4x4(block, coeff);
424
1.36M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.47M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.37M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.37M
    }
428
1.09M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
273k
  ComputeScaledDCT<4, 4>()(
431
273k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
273k
              pixels_stride),
433
273k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.36M
  for (size_t iy = 0; iy < 4; iy++) {
436
9.85M
    for (size_t ix = 0; ix < 8; ix++) {
437
8.75M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
8.75M
    }
439
1.09M
  }
440
  // 4x8 DCT of the other half of the block.
441
273k
  ComputeScaledDCT<4, 8>()(
442
273k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
273k
      block, scratch_space);
444
1.36M
  for (size_t iy = 0; iy < 4; iy++) {
445
9.85M
    for (size_t ix = 0; ix < 8; ix++) {
446
8.75M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
8.75M
    }
448
1.09M
  }
449
273k
  float block00 = coefficients[0] * 0.25f;
450
273k
  float block01 = coefficients[1];
451
273k
  float block10 = coefficients[8];
452
273k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
273k
  coefficients[1] = (block00 - block01) * 0.5f;
454
273k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
273k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
391k
                            float* JXL_RESTRICT coefficients) {
411
391k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
391k
  size_t afv_x = afv_kind & 1;
413
391k
  size_t afv_y = afv_kind / 2;
414
391k
  HWY_ALIGN float block[4 * 8] = {};
415
1.95M
  for (size_t iy = 0; iy < 4; iy++) {
416
7.82M
    for (size_t ix = 0; ix < 4; ix++) {
417
6.26M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
6.26M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
6.26M
    }
420
1.56M
  }
421
  // AFV coefficients in (even, even) positions.
422
391k
  HWY_ALIGN float coeff[4 * 4];
423
391k
  AFVDCT4x4(block, coeff);
424
1.95M
  for (size_t iy = 0; iy < 4; iy++) {
425
7.82M
    for (size_t ix = 0; ix < 4; ix++) {
426
6.26M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
6.26M
    }
428
1.56M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
391k
  ComputeScaledDCT<4, 4>()(
431
391k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
391k
              pixels_stride),
433
391k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.95M
  for (size_t iy = 0; iy < 4; iy++) {
436
14.0M
    for (size_t ix = 0; ix < 8; ix++) {
437
12.5M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
12.5M
    }
439
1.56M
  }
440
  // 4x8 DCT of the other half of the block.
441
391k
  ComputeScaledDCT<4, 8>()(
442
391k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
391k
      block, scratch_space);
444
1.95M
  for (size_t iy = 0; iy < 4; iy++) {
445
14.0M
    for (size_t ix = 0; ix < 8; ix++) {
446
12.5M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
12.5M
    }
448
1.56M
  }
449
391k
  float block00 = coefficients[0] * 0.25f;
450
391k
  float block01 = coefficients[1];
451
391k
  float block10 = coefficients[8];
452
391k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
391k
  coefficients[1] = (block00 - block01) * 0.5f;
454
391k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
391k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
273M
                                          float* JXL_RESTRICT scratch_space) {
462
273M
  using Type = AcStrategyType;
463
273M
  switch (strategy) {
464
21.1M
    case Type::IDENTITY: {
465
63.3M
      for (size_t y = 0; y < 2; y++) {
466
126M
        for (size_t x = 0; x < 2; x++) {
467
84.4M
          float block_dc = 0;
468
422M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.68G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.35G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.35G
            }
472
337M
          }
473
84.4M
          block_dc *= 1.0f / 16;
474
422M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.68G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.35G
              if (ix == 1 && iy == 1) continue;
477
1.26G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.26G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.26G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.26G
            }
481
337M
          }
482
84.4M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
84.4M
          coefficients[y * 8 + x] = block_dc;
484
84.4M
        }
485
42.2M
      }
486
21.1M
      float block00 = coefficients[0];
487
21.1M
      float block01 = coefficients[1];
488
21.1M
      float block10 = coefficients[8];
489
21.1M
      float block11 = coefficients[9];
490
21.1M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
21.1M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
21.1M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
21.1M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
21.1M
      break;
495
0
    }
496
19.8M
    case Type::DCT8X4: {
497
59.5M
      for (size_t x = 0; x < 2; x++) {
498
39.6M
        HWY_ALIGN float block[4 * 8];
499
39.6M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
39.6M
                                 scratch_space);
501
198M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.42G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.26G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.26G
          }
506
158M
        }
507
39.6M
      }
508
19.8M
      float block0 = coefficients[0];
509
19.8M
      float block1 = coefficients[8];
510
19.8M
      coefficients[0] = (block0 + block1) * 0.5f;
511
19.8M
      coefficients[8] = (block0 - block1) * 0.5f;
512
19.8M
      break;
513
0
    }
514
19.5M
    case Type::DCT4X8: {
515
58.5M
      for (size_t y = 0; y < 2; y++) {
516
39.0M
        HWY_ALIGN float block[4 * 8];
517
39.0M
        ComputeScaledDCT<4, 8>()(
518
39.0M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
39.0M
            scratch_space);
520
195M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.40G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.24G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.24G
          }
524
156M
        }
525
39.0M
      }
526
19.5M
      float block0 = coefficients[0];
527
19.5M
      float block1 = coefficients[8];
528
19.5M
      coefficients[0] = (block0 + block1) * 0.5f;
529
19.5M
      coefficients[8] = (block0 - block1) * 0.5f;
530
19.5M
      break;
531
0
    }
532
18.8M
    case Type::DCT4X4: {
533
56.4M
      for (size_t y = 0; y < 2; y++) {
534
112M
        for (size_t x = 0; x < 2; x++) {
535
75.2M
          HWY_ALIGN float block[4 * 4];
536
75.2M
          ComputeScaledDCT<4, 4>()(
537
75.2M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
75.2M
              block, scratch_space);
539
376M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.50G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.20G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.20G
            }
543
301M
          }
544
75.2M
        }
545
37.6M
      }
546
18.8M
      float block00 = coefficients[0];
547
18.8M
      float block01 = coefficients[1];
548
18.8M
      float block10 = coefficients[8];
549
18.8M
      float block11 = coefficients[9];
550
18.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
18.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
18.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
18.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
18.8M
      break;
555
0
    }
556
21.1M
    case Type::DCT2X2: {
557
21.1M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
21.1M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
21.1M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
21.1M
      break;
561
0
    }
562
8.18M
    case Type::DCT16X16: {
563
8.18M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
8.18M
                                 scratch_space);
565
8.18M
      break;
566
0
    }
567
15.8M
    case Type::DCT16X8: {
568
15.8M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
15.8M
                                scratch_space);
570
15.8M
      break;
571
0
    }
572
16.2M
    case Type::DCT8X16: {
573
16.2M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
16.2M
                                scratch_space);
575
16.2M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
3.17M
    case Type::DCT32X16: {
588
3.17M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
3.17M
                                 scratch_space);
590
3.17M
      break;
591
0
    }
592
3.31M
    case Type::DCT16X32: {
593
3.31M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
3.31M
                                 scratch_space);
595
3.31M
      break;
596
0
    }
597
1.96M
    case Type::DCT32X32: {
598
1.96M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.96M
                                 scratch_space);
600
1.96M
      break;
601
0
    }
602
44.0M
    case Type::DCT: {
603
44.0M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
44.0M
                               scratch_space);
605
44.0M
      break;
606
0
    }
607
19.4M
    case Type::AFV0: {
608
19.4M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
19.4M
      break;
610
0
    }
611
20.0M
    case Type::AFV1: {
612
20.0M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
20.0M
      break;
614
0
    }
615
19.3M
    case Type::AFV2: {
616
19.3M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
19.3M
      break;
618
0
    }
619
19.6M
    case Type::AFV3: {
620
19.6M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
19.6M
      break;
622
0
    }
623
332k
    case Type::DCT64X64: {
624
332k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
332k
                                 scratch_space);
626
332k
      break;
627
0
    }
628
981k
    case Type::DCT64X32: {
629
981k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
981k
                                 scratch_space);
631
981k
      break;
632
0
    }
633
683k
    case Type::DCT32X64: {
634
683k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
683k
                                 scratch_space);
636
683k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
273M
  }
669
273M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
9.46M
                                          float* JXL_RESTRICT scratch_space) {
462
9.46M
  using Type = AcStrategyType;
463
9.46M
  switch (strategy) {
464
1.14M
    case Type::IDENTITY: {
465
3.43M
      for (size_t y = 0; y < 2; y++) {
466
6.87M
        for (size_t x = 0; x < 2; x++) {
467
4.58M
          float block_dc = 0;
468
22.9M
          for (size_t iy = 0; iy < 4; iy++) {
469
91.6M
            for (size_t ix = 0; ix < 4; ix++) {
470
73.3M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
73.3M
            }
472
18.3M
          }
473
4.58M
          block_dc *= 1.0f / 16;
474
22.9M
          for (size_t iy = 0; iy < 4; iy++) {
475
91.6M
            for (size_t ix = 0; ix < 4; ix++) {
476
73.3M
              if (ix == 1 && iy == 1) continue;
477
68.7M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
68.7M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
68.7M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
68.7M
            }
481
18.3M
          }
482
4.58M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.58M
          coefficients[y * 8 + x] = block_dc;
484
4.58M
        }
485
2.29M
      }
486
1.14M
      float block00 = coefficients[0];
487
1.14M
      float block01 = coefficients[1];
488
1.14M
      float block10 = coefficients[8];
489
1.14M
      float block11 = coefficients[9];
490
1.14M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.14M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.14M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.14M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.14M
      break;
495
0
    }
496
511k
    case Type::DCT8X4: {
497
1.53M
      for (size_t x = 0; x < 2; x++) {
498
1.02M
        HWY_ALIGN float block[4 * 8];
499
1.02M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
1.02M
                                 scratch_space);
501
5.11M
        for (size_t iy = 0; iy < 4; iy++) {
502
36.7M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
32.7M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
32.7M
          }
506
4.08M
        }
507
1.02M
      }
508
511k
      float block0 = coefficients[0];
509
511k
      float block1 = coefficients[8];
510
511k
      coefficients[0] = (block0 + block1) * 0.5f;
511
511k
      coefficients[8] = (block0 - block1) * 0.5f;
512
511k
      break;
513
0
    }
514
344k
    case Type::DCT4X8: {
515
1.03M
      for (size_t y = 0; y < 2; y++) {
516
689k
        HWY_ALIGN float block[4 * 8];
517
689k
        ComputeScaledDCT<4, 8>()(
518
689k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
689k
            scratch_space);
520
3.44M
        for (size_t iy = 0; iy < 4; iy++) {
521
24.8M
          for (size_t ix = 0; ix < 8; ix++) {
522
22.0M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
22.0M
          }
524
2.75M
        }
525
689k
      }
526
344k
      float block0 = coefficients[0];
527
344k
      float block1 = coefficients[8];
528
344k
      coefficients[0] = (block0 + block1) * 0.5f;
529
344k
      coefficients[8] = (block0 - block1) * 0.5f;
530
344k
      break;
531
0
    }
532
2.40k
    case Type::DCT4X4: {
533
7.20k
      for (size_t y = 0; y < 2; y++) {
534
14.4k
        for (size_t x = 0; x < 2; x++) {
535
9.60k
          HWY_ALIGN float block[4 * 4];
536
9.60k
          ComputeScaledDCT<4, 4>()(
537
9.60k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.60k
              block, scratch_space);
539
48.0k
          for (size_t iy = 0; iy < 4; iy++) {
540
192k
            for (size_t ix = 0; ix < 4; ix++) {
541
153k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
153k
            }
543
38.4k
          }
544
9.60k
        }
545
4.80k
      }
546
2.40k
      float block00 = coefficients[0];
547
2.40k
      float block01 = coefficients[1];
548
2.40k
      float block10 = coefficients[8];
549
2.40k
      float block11 = coefficients[9];
550
2.40k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.40k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.40k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.40k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.40k
      break;
555
0
    }
556
1.14M
    case Type::DCT2X2: {
557
1.14M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.14M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.14M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.14M
      break;
561
0
    }
562
195k
    case Type::DCT16X16: {
563
195k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
195k
                                 scratch_space);
565
195k
      break;
566
0
    }
567
328k
    case Type::DCT16X8: {
568
328k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
328k
                                scratch_space);
570
328k
      break;
571
0
    }
572
473k
    case Type::DCT8X16: {
573
473k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
473k
                                scratch_space);
575
473k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
92.9k
    case Type::DCT32X16: {
588
92.9k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
92.9k
                                 scratch_space);
590
92.9k
      break;
591
0
    }
592
149k
    case Type::DCT16X32: {
593
149k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
149k
                                 scratch_space);
595
149k
      break;
596
0
    }
597
215k
    case Type::DCT32X32: {
598
215k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
215k
                                 scratch_space);
600
215k
      break;
601
0
    }
602
3.22M
    case Type::DCT: {
603
3.22M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
3.22M
                               scratch_space);
605
3.22M
      break;
606
0
    }
607
311k
    case Type::AFV0: {
608
311k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
311k
      break;
610
0
    }
611
601k
    case Type::AFV1: {
612
601k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
601k
      break;
614
0
    }
615
273k
    case Type::AFV2: {
616
273k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
273k
      break;
618
0
    }
619
391k
    case Type::AFV3: {
620
391k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
391k
      break;
622
0
    }
623
39.5k
    case Type::DCT64X64: {
624
39.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
39.5k
                                 scratch_space);
626
39.5k
      break;
627
0
    }
628
6.54k
    case Type::DCT64X32: {
629
6.54k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
6.54k
                                 scratch_space);
631
6.54k
      break;
632
0
    }
633
11.1k
    case Type::DCT32X64: {
634
11.1k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
11.1k
                                 scratch_space);
636
11.1k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
9.46M
  }
669
9.46M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
235M
                                          float* JXL_RESTRICT scratch_space) {
462
235M
  using Type = AcStrategyType;
463
235M
  switch (strategy) {
464
18.8M
    case Type::IDENTITY: {
465
56.4M
      for (size_t y = 0; y < 2; y++) {
466
112M
        for (size_t x = 0; x < 2; x++) {
467
75.2M
          float block_dc = 0;
468
376M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.50G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.20G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.20G
            }
472
301M
          }
473
75.2M
          block_dc *= 1.0f / 16;
474
376M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.50G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.20G
              if (ix == 1 && iy == 1) continue;
477
1.12G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.12G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.12G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.12G
            }
481
301M
          }
482
75.2M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
75.2M
          coefficients[y * 8 + x] = block_dc;
484
75.2M
        }
485
37.6M
      }
486
18.8M
      float block00 = coefficients[0];
487
18.8M
      float block01 = coefficients[1];
488
18.8M
      float block10 = coefficients[8];
489
18.8M
      float block11 = coefficients[9];
490
18.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
18.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
18.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
18.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
18.8M
      break;
495
0
    }
496
18.8M
    case Type::DCT8X4: {
497
56.4M
      for (size_t x = 0; x < 2; x++) {
498
37.6M
        HWY_ALIGN float block[4 * 8];
499
37.6M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
37.6M
                                 scratch_space);
501
188M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.35G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.20G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.20G
          }
506
150M
        }
507
37.6M
      }
508
18.8M
      float block0 = coefficients[0];
509
18.8M
      float block1 = coefficients[8];
510
18.8M
      coefficients[0] = (block0 + block1) * 0.5f;
511
18.8M
      coefficients[8] = (block0 - block1) * 0.5f;
512
18.8M
      break;
513
0
    }
514
18.8M
    case Type::DCT4X8: {
515
56.4M
      for (size_t y = 0; y < 2; y++) {
516
37.6M
        HWY_ALIGN float block[4 * 8];
517
37.6M
        ComputeScaledDCT<4, 8>()(
518
37.6M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
37.6M
            scratch_space);
520
188M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.35G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.20G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.20G
          }
524
150M
        }
525
37.6M
      }
526
18.8M
      float block0 = coefficients[0];
527
18.8M
      float block1 = coefficients[8];
528
18.8M
      coefficients[0] = (block0 + block1) * 0.5f;
529
18.8M
      coefficients[8] = (block0 - block1) * 0.5f;
530
18.8M
      break;
531
0
    }
532
18.8M
    case Type::DCT4X4: {
533
56.4M
      for (size_t y = 0; y < 2; y++) {
534
112M
        for (size_t x = 0; x < 2; x++) {
535
75.2M
          HWY_ALIGN float block[4 * 4];
536
75.2M
          ComputeScaledDCT<4, 4>()(
537
75.2M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
75.2M
              block, scratch_space);
539
376M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.50G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.20G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.20G
            }
543
301M
          }
544
75.2M
        }
545
37.6M
      }
546
18.8M
      float block00 = coefficients[0];
547
18.8M
      float block01 = coefficients[1];
548
18.8M
      float block10 = coefficients[8];
549
18.8M
      float block11 = coefficients[9];
550
18.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
18.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
18.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
18.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
18.8M
      break;
555
0
    }
556
18.8M
    case Type::DCT2X2: {
557
18.8M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
18.8M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
18.8M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
18.8M
      break;
561
0
    }
562
7.79M
    case Type::DCT16X16: {
563
7.79M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
7.79M
                                 scratch_space);
565
7.79M
      break;
566
0
    }
567
15.2M
    case Type::DCT16X8: {
568
15.2M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
15.2M
                                scratch_space);
570
15.2M
      break;
571
0
    }
572
15.3M
    case Type::DCT8X16: {
573
15.3M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
15.3M
                                scratch_space);
575
15.3M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.99M
    case Type::DCT32X16: {
588
2.99M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.99M
                                 scratch_space);
590
2.99M
      break;
591
0
    }
592
3.01M
    case Type::DCT16X32: {
593
3.01M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
3.01M
                                 scratch_space);
595
3.01M
      break;
596
0
    }
597
1.53M
    case Type::DCT32X32: {
598
1.53M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.53M
                                 scratch_space);
600
1.53M
      break;
601
0
    }
602
18.8M
    case Type::DCT: {
603
18.8M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
18.8M
                               scratch_space);
605
18.8M
      break;
606
0
    }
607
18.8M
    case Type::AFV0: {
608
18.8M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
18.8M
      break;
610
0
    }
611
18.8M
    case Type::AFV1: {
612
18.8M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
18.8M
      break;
614
0
    }
615
18.8M
    case Type::AFV2: {
616
18.8M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
18.8M
      break;
618
0
    }
619
18.8M
    case Type::AFV3: {
620
18.8M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
18.8M
      break;
622
0
    }
623
253k
    case Type::DCT64X64: {
624
253k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
253k
                                 scratch_space);
626
253k
      break;
627
0
    }
628
968k
    case Type::DCT64X32: {
629
968k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
968k
                                 scratch_space);
631
968k
      break;
632
0
    }
633
660k
    case Type::DCT32X64: {
634
660k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
660k
                                 scratch_space);
636
660k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
235M
  }
669
235M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
28.2M
                                          float* JXL_RESTRICT scratch_space) {
462
28.2M
  using Type = AcStrategyType;
463
28.2M
  switch (strategy) {
464
1.14M
    case Type::IDENTITY: {
465
3.43M
      for (size_t y = 0; y < 2; y++) {
466
6.87M
        for (size_t x = 0; x < 2; x++) {
467
4.58M
          float block_dc = 0;
468
22.9M
          for (size_t iy = 0; iy < 4; iy++) {
469
91.6M
            for (size_t ix = 0; ix < 4; ix++) {
470
73.3M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
73.3M
            }
472
18.3M
          }
473
4.58M
          block_dc *= 1.0f / 16;
474
22.9M
          for (size_t iy = 0; iy < 4; iy++) {
475
91.6M
            for (size_t ix = 0; ix < 4; ix++) {
476
73.3M
              if (ix == 1 && iy == 1) continue;
477
68.7M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
68.7M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
68.7M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
68.7M
            }
481
18.3M
          }
482
4.58M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.58M
          coefficients[y * 8 + x] = block_dc;
484
4.58M
        }
485
2.29M
      }
486
1.14M
      float block00 = coefficients[0];
487
1.14M
      float block01 = coefficients[1];
488
1.14M
      float block10 = coefficients[8];
489
1.14M
      float block11 = coefficients[9];
490
1.14M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.14M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.14M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.14M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.14M
      break;
495
0
    }
496
511k
    case Type::DCT8X4: {
497
1.53M
      for (size_t x = 0; x < 2; x++) {
498
1.02M
        HWY_ALIGN float block[4 * 8];
499
1.02M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
1.02M
                                 scratch_space);
501
5.11M
        for (size_t iy = 0; iy < 4; iy++) {
502
36.7M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
32.7M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
32.7M
          }
506
4.08M
        }
507
1.02M
      }
508
511k
      float block0 = coefficients[0];
509
511k
      float block1 = coefficients[8];
510
511k
      coefficients[0] = (block0 + block1) * 0.5f;
511
511k
      coefficients[8] = (block0 - block1) * 0.5f;
512
511k
      break;
513
0
    }
514
344k
    case Type::DCT4X8: {
515
1.03M
      for (size_t y = 0; y < 2; y++) {
516
689k
        HWY_ALIGN float block[4 * 8];
517
689k
        ComputeScaledDCT<4, 8>()(
518
689k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
689k
            scratch_space);
520
3.44M
        for (size_t iy = 0; iy < 4; iy++) {
521
24.8M
          for (size_t ix = 0; ix < 8; ix++) {
522
22.0M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
22.0M
          }
524
2.75M
        }
525
689k
      }
526
344k
      float block0 = coefficients[0];
527
344k
      float block1 = coefficients[8];
528
344k
      coefficients[0] = (block0 + block1) * 0.5f;
529
344k
      coefficients[8] = (block0 - block1) * 0.5f;
530
344k
      break;
531
0
    }
532
2.40k
    case Type::DCT4X4: {
533
7.20k
      for (size_t y = 0; y < 2; y++) {
534
14.4k
        for (size_t x = 0; x < 2; x++) {
535
9.60k
          HWY_ALIGN float block[4 * 4];
536
9.60k
          ComputeScaledDCT<4, 4>()(
537
9.60k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.60k
              block, scratch_space);
539
48.0k
          for (size_t iy = 0; iy < 4; iy++) {
540
192k
            for (size_t ix = 0; ix < 4; ix++) {
541
153k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
153k
            }
543
38.4k
          }
544
9.60k
        }
545
4.80k
      }
546
2.40k
      float block00 = coefficients[0];
547
2.40k
      float block01 = coefficients[1];
548
2.40k
      float block10 = coefficients[8];
549
2.40k
      float block11 = coefficients[9];
550
2.40k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.40k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.40k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.40k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.40k
      break;
555
0
    }
556
1.14M
    case Type::DCT2X2: {
557
1.14M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.14M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.14M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.14M
      break;
561
0
    }
562
195k
    case Type::DCT16X16: {
563
195k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
195k
                                 scratch_space);
565
195k
      break;
566
0
    }
567
328k
    case Type::DCT16X8: {
568
328k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
328k
                                scratch_space);
570
328k
      break;
571
0
    }
572
473k
    case Type::DCT8X16: {
573
473k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
473k
                                scratch_space);
575
473k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
92.9k
    case Type::DCT32X16: {
588
92.9k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
92.9k
                                 scratch_space);
590
92.9k
      break;
591
0
    }
592
149k
    case Type::DCT16X32: {
593
149k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
149k
                                 scratch_space);
595
149k
      break;
596
0
    }
597
215k
    case Type::DCT32X32: {
598
215k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
215k
                                 scratch_space);
600
215k
      break;
601
0
    }
602
22.0M
    case Type::DCT: {
603
22.0M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
22.0M
                               scratch_space);
605
22.0M
      break;
606
0
    }
607
311k
    case Type::AFV0: {
608
311k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
311k
      break;
610
0
    }
611
601k
    case Type::AFV1: {
612
601k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
601k
      break;
614
0
    }
615
273k
    case Type::AFV2: {
616
273k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
273k
      break;
618
0
    }
619
391k
    case Type::AFV3: {
620
391k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
391k
      break;
622
0
    }
623
39.5k
    case Type::DCT64X64: {
624
39.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
39.5k
                                 scratch_space);
626
39.5k
      break;
627
0
    }
628
6.54k
    case Type::DCT64X32: {
629
6.54k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
6.54k
                                 scratch_space);
631
6.54k
      break;
632
0
    }
633
11.1k
    case Type::DCT32X64: {
634
11.1k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
11.1k
                                 scratch_space);
636
11.1k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
28.2M
  }
669
28.2M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
37.7M
                                              float* scratch_space) {
676
37.7M
  using Type = AcStrategyType;
677
37.7M
  switch (strategy) {
678
656k
    case Type::DCT16X8: {
679
656k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
656k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
656k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
656k
      break;
683
0
    }
684
946k
    case Type::DCT8X16: {
685
946k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
946k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
946k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
946k
      break;
689
0
    }
690
390k
    case Type::DCT16X16: {
691
390k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
390k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
390k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
390k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
185k
    case Type::DCT32X16: {
709
185k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
185k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
185k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
185k
      break;
713
0
    }
714
298k
    case Type::DCT16X32: {
715
298k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
298k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
298k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
298k
      break;
719
0
    }
720
431k
    case Type::DCT32X32: {
721
431k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
431k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
431k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
431k
      break;
725
0
    }
726
13.0k
    case Type::DCT64X32: {
727
13.0k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
13.0k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
13.0k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
13.0k
      break;
731
0
    }
732
22.3k
    case Type::DCT32X64: {
733
22.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
22.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
22.3k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
22.3k
      break;
737
0
    }
738
79.0k
    case Type::DCT64X64: {
739
79.0k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
79.0k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
79.0k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
79.0k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
25.2M
    case Type::DCT:
787
27.5M
    case Type::DCT2X2:
788
27.5M
    case Type::DCT4X4:
789
28.2M
    case Type::DCT4X8:
790
29.2M
    case Type::DCT8X4:
791
29.8M
    case Type::AFV0:
792
31.0M
    case Type::AFV1:
793
31.6M
    case Type::AFV2:
794
32.4M
    case Type::AFV3:
795
34.7M
    case Type::IDENTITY:
796
34.7M
      dc[0] = block[0];
797
34.7M
      break;
798
37.7M
  }
799
37.7M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
9.46M
                                              float* scratch_space) {
676
9.46M
  using Type = AcStrategyType;
677
9.46M
  switch (strategy) {
678
328k
    case Type::DCT16X8: {
679
328k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
328k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
328k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
328k
      break;
683
0
    }
684
473k
    case Type::DCT8X16: {
685
473k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
473k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
473k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
473k
      break;
689
0
    }
690
195k
    case Type::DCT16X16: {
691
195k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
195k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
195k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
195k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
92.9k
    case Type::DCT32X16: {
709
92.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
92.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
92.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
92.9k
      break;
713
0
    }
714
149k
    case Type::DCT16X32: {
715
149k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
149k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
149k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
149k
      break;
719
0
    }
720
215k
    case Type::DCT32X32: {
721
215k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
215k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
215k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
215k
      break;
725
0
    }
726
6.54k
    case Type::DCT64X32: {
727
6.54k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
6.54k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
6.54k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
6.54k
      break;
731
0
    }
732
11.1k
    case Type::DCT32X64: {
733
11.1k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
11.1k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
11.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
11.1k
      break;
737
0
    }
738
39.5k
    case Type::DCT64X64: {
739
39.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
39.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
39.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
39.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
3.22M
    case Type::DCT:
787
4.36M
    case Type::DCT2X2:
788
4.36M
    case Type::DCT4X4:
789
4.71M
    case Type::DCT4X8:
790
5.22M
    case Type::DCT8X4:
791
5.53M
    case Type::AFV0:
792
6.13M
    case Type::AFV1:
793
6.41M
    case Type::AFV2:
794
6.80M
    case Type::AFV3:
795
7.94M
    case Type::IDENTITY:
796
7.94M
      dc[0] = block[0];
797
7.94M
      break;
798
9.46M
  }
799
9.46M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
28.2M
                                              float* scratch_space) {
676
28.2M
  using Type = AcStrategyType;
677
28.2M
  switch (strategy) {
678
328k
    case Type::DCT16X8: {
679
328k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
328k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
328k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
328k
      break;
683
0
    }
684
473k
    case Type::DCT8X16: {
685
473k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
473k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
473k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
473k
      break;
689
0
    }
690
195k
    case Type::DCT16X16: {
691
195k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
195k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
195k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
195k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
92.9k
    case Type::DCT32X16: {
709
92.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
92.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
92.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
92.9k
      break;
713
0
    }
714
149k
    case Type::DCT16X32: {
715
149k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
149k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
149k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
149k
      break;
719
0
    }
720
215k
    case Type::DCT32X32: {
721
215k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
215k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
215k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
215k
      break;
725
0
    }
726
6.54k
    case Type::DCT64X32: {
727
6.54k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
6.54k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
6.54k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
6.54k
      break;
731
0
    }
732
11.1k
    case Type::DCT32X64: {
733
11.1k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
11.1k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
11.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
11.1k
      break;
737
0
    }
738
39.5k
    case Type::DCT64X64: {
739
39.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
39.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
39.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
39.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
22.0M
    case Type::DCT:
787
23.1M
    case Type::DCT2X2:
788
23.1M
    case Type::DCT4X4:
789
23.5M
    case Type::DCT4X8:
790
24.0M
    case Type::DCT8X4:
791
24.3M
    case Type::AFV0:
792
24.9M
    case Type::AFV1:
793
25.2M
    case Type::AFV2:
794
25.6M
    case Type::AFV3:
795
26.7M
    case Type::IDENTITY:
796
26.7M
      dc[0] = block[0];
797
26.7M
      break;
798
28.2M
  }
799
28.2M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_