Coverage Report

Created: 2026-06-14 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
73.5k
                                  float* JXL_RESTRICT scratch_space) {
41
73.5k
  static_assert(LF_ROWS == ROWS,
42
73.5k
                "ReinterpretingDCT should only be called with LF == N");
43
73.5k
  static_assert(LF_COLS == COLS,
44
73.5k
                "ReinterpretingDCT should only be called with LF == N");
45
73.5k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
73.5k
                                 scratch_space);
47
73.5k
  if (ROWS < COLS) {
48
135k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
211k
      for (size_t x = 0; x < LF_COLS; x++) {
50
143k
        output[y * output_stride + x] =
51
143k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
143k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
143k
      }
54
67.8k
    }
55
67.3k
  } else {
56
21.5k
    for (size_t y = 0; y < LF_COLS; y++) {
57
88.4k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
73.1k
        output[y * output_stride + x] =
59
73.1k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
73.1k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
73.1k
      }
62
15.3k
    }
63
6.21k
  }
64
73.5k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
402
                                  float* JXL_RESTRICT scratch_space) {
41
402
  static_assert(LF_ROWS == ROWS,
42
402
                "ReinterpretingDCT should only be called with LF == N");
43
402
  static_assert(LF_COLS == COLS,
44
402
                "ReinterpretingDCT should only be called with LF == N");
45
402
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
402
                                 scratch_space);
47
402
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
402
  } else {
56
804
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.20k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
804
        output[y * output_stride + x] =
59
804
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
804
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
804
      }
62
402
    }
63
402
  }
64
402
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
66.8k
                                  float* JXL_RESTRICT scratch_space) {
41
66.8k
  static_assert(LF_ROWS == ROWS,
42
66.8k
                "ReinterpretingDCT should only be called with LF == N");
43
66.8k
  static_assert(LF_COLS == COLS,
44
66.8k
                "ReinterpretingDCT should only be called with LF == N");
45
66.8k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
66.8k
                                 scratch_space);
47
67.1k
  if (ROWS < COLS) {
48
134k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
201k
      for (size_t x = 0; x < LF_COLS; x++) {
50
134k
        output[y * output_stride + x] =
51
134k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
134k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
134k
      }
54
67.1k
    }
55
18.4E
  } else {
56
18.4E
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
18.4E
  }
64
66.8k
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
4.66k
                                  float* JXL_RESTRICT scratch_space) {
41
4.66k
  static_assert(LF_ROWS == ROWS,
42
4.66k
                "ReinterpretingDCT should only be called with LF == N");
43
4.66k
  static_assert(LF_COLS == COLS,
44
4.66k
                "ReinterpretingDCT should only be called with LF == N");
45
4.66k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
4.66k
                                 scratch_space);
47
4.66k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
4.66k
  } else {
56
14.0k
    for (size_t y = 0; y < LF_COLS; y++) {
57
28.0k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
18.6k
        output[y * output_stride + x] =
59
18.6k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
18.6k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
18.6k
      }
62
9.34k
    }
63
4.66k
  }
64
4.66k
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
588
                                  float* JXL_RESTRICT scratch_space) {
41
588
  static_assert(LF_ROWS == ROWS,
42
588
                "ReinterpretingDCT should only be called with LF == N");
43
588
  static_assert(LF_COLS == COLS,
44
588
                "ReinterpretingDCT should only be called with LF == N");
45
588
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
588
                                 scratch_space);
47
588
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
588
  } else {
56
1.17k
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.94k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.35k
        output[y * output_stride + x] =
59
2.35k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.35k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.35k
      }
62
588
    }
63
588
  }
64
588
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
219
                                  float* JXL_RESTRICT scratch_space) {
41
219
  static_assert(LF_ROWS == ROWS,
42
219
                "ReinterpretingDCT should only be called with LF == N");
43
219
  static_assert(LF_COLS == COLS,
44
219
                "ReinterpretingDCT should only be called with LF == N");
45
219
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
219
                                 scratch_space);
47
219
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
219
  } else {
56
657
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.19k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.75k
        output[y * output_stride + x] =
59
1.75k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.75k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.75k
      }
62
438
    }
63
219
  }
64
219
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
132
                                  float* JXL_RESTRICT scratch_space) {
41
132
  static_assert(LF_ROWS == ROWS,
42
132
                "ReinterpretingDCT should only be called with LF == N");
43
132
  static_assert(LF_COLS == COLS,
44
132
                "ReinterpretingDCT should only be called with LF == N");
45
132
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
132
                                 scratch_space);
47
132
  if (ROWS < COLS) {
48
396
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.32k
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.05k
        output[y * output_stride + x] =
51
1.05k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.05k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.05k
      }
54
264
    }
55
132
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
132
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
120
                                  float* JXL_RESTRICT scratch_space) {
41
120
  static_assert(LF_ROWS == ROWS,
42
120
                "ReinterpretingDCT should only be called with LF == N");
43
120
  static_assert(LF_COLS == COLS,
44
120
                "ReinterpretingDCT should only be called with LF == N");
45
120
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
120
                                 scratch_space);
47
120
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
120
  } else {
56
600
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.40k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.92k
        output[y * output_stride + x] =
59
1.92k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.92k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.92k
      }
62
480
    }
63
120
  }
64
120
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
63
                                  float* JXL_RESTRICT scratch_space) {
41
63
  static_assert(LF_ROWS == ROWS,
42
63
                "ReinterpretingDCT should only be called with LF == N");
43
63
  static_assert(LF_COLS == COLS,
44
63
                "ReinterpretingDCT should only be called with LF == N");
45
63
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
63
                                 scratch_space);
47
63
  if (ROWS < COLS) {
48
315
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.26k
      for (size_t x = 0; x < LF_COLS; x++) {
50
2.01k
        output[y * output_stride + x] =
51
2.01k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
2.01k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
2.01k
      }
54
252
    }
55
63
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
63
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
354
                                  float* JXL_RESTRICT scratch_space) {
41
354
  static_assert(LF_ROWS == ROWS,
42
354
                "ReinterpretingDCT should only be called with LF == N");
43
354
  static_assert(LF_COLS == COLS,
44
354
                "ReinterpretingDCT should only be called with LF == N");
45
354
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
354
                                 scratch_space);
47
354
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
354
  } else {
56
3.18k
    for (size_t y = 0; y < LF_COLS; y++) {
57
25.4k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
22.6k
        output[y * output_stride + x] =
59
22.6k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
22.6k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
22.6k
      }
62
2.83k
    }
63
354
  }
64
354
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
123
                                  float* JXL_RESTRICT scratch_space) {
41
123
  static_assert(LF_ROWS == ROWS,
42
123
                "ReinterpretingDCT should only be called with LF == N");
43
123
  static_assert(LF_COLS == COLS,
44
123
                "ReinterpretingDCT should only be called with LF == N");
45
123
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
123
                                 scratch_space);
47
123
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
123
  } else {
56
1.10k
    for (size_t y = 0; y < LF_COLS; y++) {
57
16.7k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
15.7k
        output[y * output_stride + x] =
59
15.7k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
15.7k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
15.7k
      }
62
984
    }
63
123
  }
64
123
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
12
                                  float* JXL_RESTRICT scratch_space) {
41
12
  static_assert(LF_ROWS == ROWS,
42
12
                "ReinterpretingDCT should only be called with LF == N");
43
12
  static_assert(LF_COLS == COLS,
44
12
                "ReinterpretingDCT should only be called with LF == N");
45
12
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
12
                                 scratch_space);
47
12
  if (ROWS < COLS) {
48
204
    for (size_t y = 0; y < LF_ROWS; y++) {
49
6.33k
      for (size_t x = 0; x < LF_COLS; x++) {
50
6.14k
        output[y * output_stride + x] =
51
6.14k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
6.14k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
6.14k
      }
54
192
    }
55
12
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
12
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
9
                                  float* JXL_RESTRICT scratch_space) {
41
9
  static_assert(LF_ROWS == ROWS,
42
9
                "ReinterpretingDCT should only be called with LF == N");
43
9
  static_assert(LF_COLS == COLS,
44
9
                "ReinterpretingDCT should only be called with LF == N");
45
9
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
9
                                 scratch_space);
47
9
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
9
  } else {
56
297
    for (size_t y = 0; y < LF_COLS; y++) {
57
9.50k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
9.21k
        output[y * output_stride + x] =
59
9.21k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
9.21k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
9.21k
      }
62
288
    }
63
9
  }
64
9
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
10.9k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
10.9k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
10.9k
  static_assert(S % 2 == 0, "S should be even");
70
10.9k
  float temp[kDCTBlockSize];
71
10.9k
  constexpr size_t num_2x2 = S / 2;
72
36.5k
  for (size_t y = 0; y < num_2x2; y++) {
73
102k
    for (size_t x = 0; x < num_2x2; x++) {
74
76.7k
      float c00 = block[y * kBlockDim + x];
75
76.7k
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
76.7k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
76.7k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
76.7k
      float r00 = c00 + c01 + c10 + c11;
79
76.7k
      float r01 = c00 + c01 - c10 - c11;
80
76.7k
      float r10 = c00 - c01 + c10 - c11;
81
76.7k
      float r11 = c00 - c01 - c10 + c11;
82
76.7k
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
76.7k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
76.7k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
76.7k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
76.7k
    }
87
25.5k
  }
88
62.1k
  for (size_t y = 0; y < S; y++) {
89
358k
    for (size_t x = 0; x < S; x++) {
90
306k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
306k
    }
92
51.1k
  }
93
10.9k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.65k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.65k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.65k
  static_assert(S % 2 == 0, "S should be even");
70
3.65k
  float temp[kDCTBlockSize];
71
3.65k
  constexpr size_t num_2x2 = S / 2;
72
7.30k
  for (size_t y = 0; y < num_2x2; y++) {
73
7.30k
    for (size_t x = 0; x < num_2x2; x++) {
74
3.65k
      float c00 = block[y * kBlockDim + x];
75
3.65k
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
3.65k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
3.65k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
3.65k
      float r00 = c00 + c01 + c10 + c11;
79
3.65k
      float r01 = c00 + c01 - c10 - c11;
80
3.65k
      float r10 = c00 - c01 + c10 - c11;
81
3.65k
      float r11 = c00 - c01 - c10 + c11;
82
3.65k
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
3.65k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
3.65k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
3.65k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
3.65k
    }
87
3.65k
  }
88
10.9k
  for (size_t y = 0; y < S; y++) {
89
21.9k
    for (size_t x = 0; x < S; x++) {
90
14.6k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
14.6k
    }
92
7.30k
  }
93
3.65k
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.65k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.65k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.65k
  static_assert(S % 2 == 0, "S should be even");
70
3.65k
  float temp[kDCTBlockSize];
71
3.65k
  constexpr size_t num_2x2 = S / 2;
72
10.9k
  for (size_t y = 0; y < num_2x2; y++) {
73
21.9k
    for (size_t x = 0; x < num_2x2; x++) {
74
14.6k
      float c00 = block[y * kBlockDim + x];
75
14.6k
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
14.6k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
14.6k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
14.6k
      float r00 = c00 + c01 + c10 + c11;
79
14.6k
      float r01 = c00 + c01 - c10 - c11;
80
14.6k
      float r10 = c00 - c01 + c10 - c11;
81
14.6k
      float r11 = c00 - c01 - c10 + c11;
82
14.6k
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
14.6k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
14.6k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
14.6k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
14.6k
    }
87
7.30k
  }
88
18.2k
  for (size_t y = 0; y < S; y++) {
89
73.0k
    for (size_t x = 0; x < S; x++) {
90
58.4k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
58.4k
    }
92
14.6k
  }
93
3.65k
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.65k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.65k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.65k
  static_assert(S % 2 == 0, "S should be even");
70
3.65k
  float temp[kDCTBlockSize];
71
3.65k
  constexpr size_t num_2x2 = S / 2;
72
18.2k
  for (size_t y = 0; y < num_2x2; y++) {
73
73.0k
    for (size_t x = 0; x < num_2x2; x++) {
74
58.4k
      float c00 = block[y * kBlockDim + x];
75
58.4k
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
58.4k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
58.4k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
58.4k
      float r00 = c00 + c01 + c10 + c11;
79
58.4k
      float r01 = c00 + c01 - c10 - c11;
80
58.4k
      float r10 = c00 - c01 + c10 - c11;
81
58.4k
      float r11 = c00 - c01 - c10 + c11;
82
58.4k
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
58.4k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
58.4k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
58.4k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
58.4k
    }
87
14.6k
  }
88
32.8k
  for (size_t y = 0; y < S; y++) {
89
263k
    for (size_t x = 0; x < S; x++) {
90
233k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
233k
    }
92
29.2k
  }
93
3.65k
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
7.91k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
7.91k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
7.91k
      {
98
7.91k
          0.25,
99
7.91k
          0.25,
100
7.91k
          0.25,
101
7.91k
          0.25,
102
7.91k
          0.25,
103
7.91k
          0.25,
104
7.91k
          0.25,
105
7.91k
          0.25,
106
7.91k
          0.25,
107
7.91k
          0.25,
108
7.91k
          0.25,
109
7.91k
          0.25,
110
7.91k
          0.25,
111
7.91k
          0.25,
112
7.91k
          0.25,
113
7.91k
          0.25,
114
7.91k
      },
115
7.91k
      {
116
7.91k
          0.876902929799142f,
117
7.91k
          0.2206518106944235f,
118
7.91k
          -0.10140050393753763f,
119
7.91k
          -0.1014005039375375f,
120
7.91k
          0.2206518106944236f,
121
7.91k
          -0.10140050393753777f,
122
7.91k
          -0.10140050393753772f,
123
7.91k
          -0.10140050393753763f,
124
7.91k
          -0.10140050393753758f,
125
7.91k
          -0.10140050393753769f,
126
7.91k
          -0.1014005039375375f,
127
7.91k
          -0.10140050393753768f,
128
7.91k
          -0.10140050393753768f,
129
7.91k
          -0.10140050393753759f,
130
7.91k
          -0.10140050393753763f,
131
7.91k
          -0.10140050393753741f,
132
7.91k
      },
133
7.91k
      {
134
7.91k
          0.0,
135
7.91k
          0.0,
136
7.91k
          0.40670075830260755f,
137
7.91k
          0.44444816619734445f,
138
7.91k
          0.0,
139
7.91k
          0.0,
140
7.91k
          0.19574399372042936f,
141
7.91k
          0.2929100136981264f,
142
7.91k
          -0.40670075830260716f,
143
7.91k
          -0.19574399372042872f,
144
7.91k
          0.0,
145
7.91k
          0.11379074460448091f,
146
7.91k
          -0.44444816619734384f,
147
7.91k
          -0.29291001369812636f,
148
7.91k
          -0.1137907446044814f,
149
7.91k
          0.0,
150
7.91k
      },
151
7.91k
      {
152
7.91k
          0.0,
153
7.91k
          0.0,
154
7.91k
          -0.21255748058288748f,
155
7.91k
          0.3085497062849767f,
156
7.91k
          0.0,
157
7.91k
          0.4706702258572536f,
158
7.91k
          -0.1621205195722993f,
159
7.91k
          0.0,
160
7.91k
          -0.21255748058287047f,
161
7.91k
          -0.16212051957228327f,
162
7.91k
          -0.47067022585725277f,
163
7.91k
          -0.1464291867126764f,
164
7.91k
          0.3085497062849487f,
165
7.91k
          0.0,
166
7.91k
          -0.14642918671266536f,
167
7.91k
          0.4251149611657548f,
168
7.91k
      },
169
7.91k
      {
170
7.91k
          0.0,
171
7.91k
          -0.7071067811865474f,
172
7.91k
          0.0,
173
7.91k
          0.0,
174
7.91k
          0.7071067811865476f,
175
7.91k
          0.0,
176
7.91k
          0.0,
177
7.91k
          0.0,
178
7.91k
          0.0,
179
7.91k
          0.0,
180
7.91k
          0.0,
181
7.91k
          0.0,
182
7.91k
          0.0,
183
7.91k
          0.0,
184
7.91k
          0.0,
185
7.91k
          0.0,
186
7.91k
      },
187
7.91k
      {
188
7.91k
          -0.4105377591765233f,
189
7.91k
          0.6235485373547691f,
190
7.91k
          -0.06435071657946274f,
191
7.91k
          -0.06435071657946266f,
192
7.91k
          0.6235485373547694f,
193
7.91k
          -0.06435071657946284f,
194
7.91k
          -0.0643507165794628f,
195
7.91k
          -0.06435071657946274f,
196
7.91k
          -0.06435071657946272f,
197
7.91k
          -0.06435071657946279f,
198
7.91k
          -0.06435071657946266f,
199
7.91k
          -0.06435071657946277f,
200
7.91k
          -0.06435071657946277f,
201
7.91k
          -0.06435071657946273f,
202
7.91k
          -0.06435071657946274f,
203
7.91k
          -0.0643507165794626f,
204
7.91k
      },
205
7.91k
      {
206
7.91k
          0.0,
207
7.91k
          0.0,
208
7.91k
          -0.4517556589999482f,
209
7.91k
          0.15854503551840063f,
210
7.91k
          0.0,
211
7.91k
          -0.04038515160822202f,
212
7.91k
          0.0074182263792423875f,
213
7.91k
          0.39351034269210167f,
214
7.91k
          -0.45175565899994635f,
215
7.91k
          0.007418226379244351f,
216
7.91k
          0.1107416575309343f,
217
7.91k
          0.08298163094882051f,
218
7.91k
          0.15854503551839705f,
219
7.91k
          0.3935103426921022f,
220
7.91k
          0.0829816309488214f,
221
7.91k
          -0.45175565899994796f,
222
7.91k
      },
223
7.91k
      {
224
7.91k
          0.0,
225
7.91k
          0.0,
226
7.91k
          -0.304684750724869f,
227
7.91k
          0.5112616136591823f,
228
7.91k
          0.0,
229
7.91k
          0.0,
230
7.91k
          -0.290480129728998f,
231
7.91k
          -0.06578701549142804f,
232
7.91k
          0.304684750724884f,
233
7.91k
          0.2904801297290076f,
234
7.91k
          0.0,
235
7.91k
          -0.23889773523344604f,
236
7.91k
          -0.5112616136592012f,
237
7.91k
          0.06578701549142545f,
238
7.91k
          0.23889773523345467f,
239
7.91k
          0.0,
240
7.91k
      },
241
7.91k
      {
242
7.91k
          0.0,
243
7.91k
          0.0,
244
7.91k
          0.3017929516615495f,
245
7.91k
          0.25792362796341184f,
246
7.91k
          0.0,
247
7.91k
          0.16272340142866204f,
248
7.91k
          0.09520022653475037f,
249
7.91k
          0.0,
250
7.91k
          0.3017929516615503f,
251
7.91k
          0.09520022653475055f,
252
7.91k
          -0.16272340142866173f,
253
7.91k
          -0.35312385449816297f,
254
7.91k
          0.25792362796341295f,
255
7.91k
          0.0,
256
7.91k
          -0.3531238544981624f,
257
7.91k
          -0.6035859033230976f,
258
7.91k
      },
259
7.91k
      {
260
7.91k
          0.0,
261
7.91k
          0.0,
262
7.91k
          0.40824829046386274f,
263
7.91k
          0.0,
264
7.91k
          0.0,
265
7.91k
          0.0,
266
7.91k
          0.0,
267
7.91k
          -0.4082482904638628f,
268
7.91k
          -0.4082482904638635f,
269
7.91k
          0.0,
270
7.91k
          0.0,
271
7.91k
          -0.40824829046386296f,
272
7.91k
          0.0,
273
7.91k
          0.4082482904638634f,
274
7.91k
          0.408248290463863f,
275
7.91k
          0.0,
276
7.91k
      },
277
7.91k
      {
278
7.91k
          0.0,
279
7.91k
          0.0,
280
7.91k
          0.1747866975480809f,
281
7.91k
          0.0812611176717539f,
282
7.91k
          0.0,
283
7.91k
          0.0,
284
7.91k
          -0.3675398009862027f,
285
7.91k
          -0.307882213957909f,
286
7.91k
          -0.17478669754808135f,
287
7.91k
          0.3675398009862011f,
288
7.91k
          0.0,
289
7.91k
          0.4826689115059883f,
290
7.91k
          -0.08126111767175039f,
291
7.91k
          0.30788221395790305f,
292
7.91k
          -0.48266891150598584f,
293
7.91k
          0.0,
294
7.91k
      },
295
7.91k
      {
296
7.91k
          0.0,
297
7.91k
          0.0,
298
7.91k
          -0.21105601049335784f,
299
7.91k
          0.18567180916109802f,
300
7.91k
          0.0,
301
7.91k
          0.0,
302
7.91k
          0.49215859013738733f,
303
7.91k
          -0.38525013709251915f,
304
7.91k
          0.21105601049335806f,
305
7.91k
          -0.49215859013738905f,
306
7.91k
          0.0,
307
7.91k
          0.17419412659916217f,
308
7.91k
          -0.18567180916109904f,
309
7.91k
          0.3852501370925211f,
310
7.91k
          -0.1741941265991621f,
311
7.91k
          0.0,
312
7.91k
      },
313
7.91k
      {
314
7.91k
          0.0,
315
7.91k
          0.0,
316
7.91k
          -0.14266084808807264f,
317
7.91k
          -0.3416446842253372f,
318
7.91k
          0.0,
319
7.91k
          0.7367497537172237f,
320
7.91k
          0.24627107722075148f,
321
7.91k
          -0.08574019035519306f,
322
7.91k
          -0.14266084808807344f,
323
7.91k
          0.24627107722075137f,
324
7.91k
          0.14883399227113567f,
325
7.91k
          -0.04768680350229251f,
326
7.91k
          -0.3416446842253373f,
327
7.91k
          -0.08574019035519267f,
328
7.91k
          -0.047686803502292804f,
329
7.91k
          -0.14266084808807242f,
330
7.91k
      },
331
7.91k
      {
332
7.91k
          0.0,
333
7.91k
          0.0,
334
7.91k
          -0.13813540350758585f,
335
7.91k
          0.3302282550303788f,
336
7.91k
          0.0,
337
7.91k
          0.08755115000587084f,
338
7.91k
          -0.07946706605909573f,
339
7.91k
          -0.4613374887461511f,
340
7.91k
          -0.13813540350758294f,
341
7.91k
          -0.07946706605910261f,
342
7.91k
          0.49724647109535086f,
343
7.91k
          0.12538059448563663f,
344
7.91k
          0.3302282550303805f,
345
7.91k
          -0.4613374887461554f,
346
7.91k
          0.12538059448564315f,
347
7.91k
          -0.13813540350758452f,
348
7.91k
      },
349
7.91k
      {
350
7.91k
          0.0,
351
7.91k
          0.0,
352
7.91k
          -0.17437602599651067f,
353
7.91k
          0.0702790691196284f,
354
7.91k
          0.0,
355
7.91k
          -0.2921026642334881f,
356
7.91k
          0.3623817333531167f,
357
7.91k
          0.0,
358
7.91k
          -0.1743760259965108f,
359
7.91k
          0.36238173335311646f,
360
7.91k
          0.29210266423348785f,
361
7.91k
          -0.4326608024727445f,
362
7.91k
          0.07027906911962818f,
363
7.91k
          0.0,
364
7.91k
          -0.4326608024727457f,
365
7.91k
          0.34875205199302267f,
366
7.91k
      },
367
7.91k
      {
368
7.91k
          0.0,
369
7.91k
          0.0,
370
7.91k
          0.11354987314994337f,
371
7.91k
          -0.07417504595810355f,
372
7.91k
          0.0,
373
7.91k
          0.19402893032594343f,
374
7.91k
          -0.435190496523228f,
375
7.91k
          0.21918684838857466f,
376
7.91k
          0.11354987314994257f,
377
7.91k
          -0.4351904965232251f,
378
7.91k
          0.5550443808910661f,
379
7.91k
          -0.25468277124066463f,
380
7.91k
          -0.07417504595810233f,
381
7.91k
          0.2191868483885728f,
382
7.91k
          -0.25468277124066413f,
383
7.91k
          0.1135498731499429f,
384
7.91k
      },
385
7.91k
  };
386
387
7.91k
  const HWY_CAPPED(float, 16) d;
388
134k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
126k
    auto pixel = Zero(d);
390
2.15M
    for (size_t j = 0; j < 16; j++) {
391
2.02M
      auto cf = Set(d, coeffs[j]);
392
2.02M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
2.02M
      pixel = MulAdd(cf, basis, pixel);
394
2.02M
    }
395
126k
    Store(pixel, d, pixels + i);
396
126k
  }
397
7.91k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
7.91k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
7.91k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
7.91k
      {
98
7.91k
          0.25,
99
7.91k
          0.25,
100
7.91k
          0.25,
101
7.91k
          0.25,
102
7.91k
          0.25,
103
7.91k
          0.25,
104
7.91k
          0.25,
105
7.91k
          0.25,
106
7.91k
          0.25,
107
7.91k
          0.25,
108
7.91k
          0.25,
109
7.91k
          0.25,
110
7.91k
          0.25,
111
7.91k
          0.25,
112
7.91k
          0.25,
113
7.91k
          0.25,
114
7.91k
      },
115
7.91k
      {
116
7.91k
          0.876902929799142f,
117
7.91k
          0.2206518106944235f,
118
7.91k
          -0.10140050393753763f,
119
7.91k
          -0.1014005039375375f,
120
7.91k
          0.2206518106944236f,
121
7.91k
          -0.10140050393753777f,
122
7.91k
          -0.10140050393753772f,
123
7.91k
          -0.10140050393753763f,
124
7.91k
          -0.10140050393753758f,
125
7.91k
          -0.10140050393753769f,
126
7.91k
          -0.1014005039375375f,
127
7.91k
          -0.10140050393753768f,
128
7.91k
          -0.10140050393753768f,
129
7.91k
          -0.10140050393753759f,
130
7.91k
          -0.10140050393753763f,
131
7.91k
          -0.10140050393753741f,
132
7.91k
      },
133
7.91k
      {
134
7.91k
          0.0,
135
7.91k
          0.0,
136
7.91k
          0.40670075830260755f,
137
7.91k
          0.44444816619734445f,
138
7.91k
          0.0,
139
7.91k
          0.0,
140
7.91k
          0.19574399372042936f,
141
7.91k
          0.2929100136981264f,
142
7.91k
          -0.40670075830260716f,
143
7.91k
          -0.19574399372042872f,
144
7.91k
          0.0,
145
7.91k
          0.11379074460448091f,
146
7.91k
          -0.44444816619734384f,
147
7.91k
          -0.29291001369812636f,
148
7.91k
          -0.1137907446044814f,
149
7.91k
          0.0,
150
7.91k
      },
151
7.91k
      {
152
7.91k
          0.0,
153
7.91k
          0.0,
154
7.91k
          -0.21255748058288748f,
155
7.91k
          0.3085497062849767f,
156
7.91k
          0.0,
157
7.91k
          0.4706702258572536f,
158
7.91k
          -0.1621205195722993f,
159
7.91k
          0.0,
160
7.91k
          -0.21255748058287047f,
161
7.91k
          -0.16212051957228327f,
162
7.91k
          -0.47067022585725277f,
163
7.91k
          -0.1464291867126764f,
164
7.91k
          0.3085497062849487f,
165
7.91k
          0.0,
166
7.91k
          -0.14642918671266536f,
167
7.91k
          0.4251149611657548f,
168
7.91k
      },
169
7.91k
      {
170
7.91k
          0.0,
171
7.91k
          -0.7071067811865474f,
172
7.91k
          0.0,
173
7.91k
          0.0,
174
7.91k
          0.7071067811865476f,
175
7.91k
          0.0,
176
7.91k
          0.0,
177
7.91k
          0.0,
178
7.91k
          0.0,
179
7.91k
          0.0,
180
7.91k
          0.0,
181
7.91k
          0.0,
182
7.91k
          0.0,
183
7.91k
          0.0,
184
7.91k
          0.0,
185
7.91k
          0.0,
186
7.91k
      },
187
7.91k
      {
188
7.91k
          -0.4105377591765233f,
189
7.91k
          0.6235485373547691f,
190
7.91k
          -0.06435071657946274f,
191
7.91k
          -0.06435071657946266f,
192
7.91k
          0.6235485373547694f,
193
7.91k
          -0.06435071657946284f,
194
7.91k
          -0.0643507165794628f,
195
7.91k
          -0.06435071657946274f,
196
7.91k
          -0.06435071657946272f,
197
7.91k
          -0.06435071657946279f,
198
7.91k
          -0.06435071657946266f,
199
7.91k
          -0.06435071657946277f,
200
7.91k
          -0.06435071657946277f,
201
7.91k
          -0.06435071657946273f,
202
7.91k
          -0.06435071657946274f,
203
7.91k
          -0.0643507165794626f,
204
7.91k
      },
205
7.91k
      {
206
7.91k
          0.0,
207
7.91k
          0.0,
208
7.91k
          -0.4517556589999482f,
209
7.91k
          0.15854503551840063f,
210
7.91k
          0.0,
211
7.91k
          -0.04038515160822202f,
212
7.91k
          0.0074182263792423875f,
213
7.91k
          0.39351034269210167f,
214
7.91k
          -0.45175565899994635f,
215
7.91k
          0.007418226379244351f,
216
7.91k
          0.1107416575309343f,
217
7.91k
          0.08298163094882051f,
218
7.91k
          0.15854503551839705f,
219
7.91k
          0.3935103426921022f,
220
7.91k
          0.0829816309488214f,
221
7.91k
          -0.45175565899994796f,
222
7.91k
      },
223
7.91k
      {
224
7.91k
          0.0,
225
7.91k
          0.0,
226
7.91k
          -0.304684750724869f,
227
7.91k
          0.5112616136591823f,
228
7.91k
          0.0,
229
7.91k
          0.0,
230
7.91k
          -0.290480129728998f,
231
7.91k
          -0.06578701549142804f,
232
7.91k
          0.304684750724884f,
233
7.91k
          0.2904801297290076f,
234
7.91k
          0.0,
235
7.91k
          -0.23889773523344604f,
236
7.91k
          -0.5112616136592012f,
237
7.91k
          0.06578701549142545f,
238
7.91k
          0.23889773523345467f,
239
7.91k
          0.0,
240
7.91k
      },
241
7.91k
      {
242
7.91k
          0.0,
243
7.91k
          0.0,
244
7.91k
          0.3017929516615495f,
245
7.91k
          0.25792362796341184f,
246
7.91k
          0.0,
247
7.91k
          0.16272340142866204f,
248
7.91k
          0.09520022653475037f,
249
7.91k
          0.0,
250
7.91k
          0.3017929516615503f,
251
7.91k
          0.09520022653475055f,
252
7.91k
          -0.16272340142866173f,
253
7.91k
          -0.35312385449816297f,
254
7.91k
          0.25792362796341295f,
255
7.91k
          0.0,
256
7.91k
          -0.3531238544981624f,
257
7.91k
          -0.6035859033230976f,
258
7.91k
      },
259
7.91k
      {
260
7.91k
          0.0,
261
7.91k
          0.0,
262
7.91k
          0.40824829046386274f,
263
7.91k
          0.0,
264
7.91k
          0.0,
265
7.91k
          0.0,
266
7.91k
          0.0,
267
7.91k
          -0.4082482904638628f,
268
7.91k
          -0.4082482904638635f,
269
7.91k
          0.0,
270
7.91k
          0.0,
271
7.91k
          -0.40824829046386296f,
272
7.91k
          0.0,
273
7.91k
          0.4082482904638634f,
274
7.91k
          0.408248290463863f,
275
7.91k
          0.0,
276
7.91k
      },
277
7.91k
      {
278
7.91k
          0.0,
279
7.91k
          0.0,
280
7.91k
          0.1747866975480809f,
281
7.91k
          0.0812611176717539f,
282
7.91k
          0.0,
283
7.91k
          0.0,
284
7.91k
          -0.3675398009862027f,
285
7.91k
          -0.307882213957909f,
286
7.91k
          -0.17478669754808135f,
287
7.91k
          0.3675398009862011f,
288
7.91k
          0.0,
289
7.91k
          0.4826689115059883f,
290
7.91k
          -0.08126111767175039f,
291
7.91k
          0.30788221395790305f,
292
7.91k
          -0.48266891150598584f,
293
7.91k
          0.0,
294
7.91k
      },
295
7.91k
      {
296
7.91k
          0.0,
297
7.91k
          0.0,
298
7.91k
          -0.21105601049335784f,
299
7.91k
          0.18567180916109802f,
300
7.91k
          0.0,
301
7.91k
          0.0,
302
7.91k
          0.49215859013738733f,
303
7.91k
          -0.38525013709251915f,
304
7.91k
          0.21105601049335806f,
305
7.91k
          -0.49215859013738905f,
306
7.91k
          0.0,
307
7.91k
          0.17419412659916217f,
308
7.91k
          -0.18567180916109904f,
309
7.91k
          0.3852501370925211f,
310
7.91k
          -0.1741941265991621f,
311
7.91k
          0.0,
312
7.91k
      },
313
7.91k
      {
314
7.91k
          0.0,
315
7.91k
          0.0,
316
7.91k
          -0.14266084808807264f,
317
7.91k
          -0.3416446842253372f,
318
7.91k
          0.0,
319
7.91k
          0.7367497537172237f,
320
7.91k
          0.24627107722075148f,
321
7.91k
          -0.08574019035519306f,
322
7.91k
          -0.14266084808807344f,
323
7.91k
          0.24627107722075137f,
324
7.91k
          0.14883399227113567f,
325
7.91k
          -0.04768680350229251f,
326
7.91k
          -0.3416446842253373f,
327
7.91k
          -0.08574019035519267f,
328
7.91k
          -0.047686803502292804f,
329
7.91k
          -0.14266084808807242f,
330
7.91k
      },
331
7.91k
      {
332
7.91k
          0.0,
333
7.91k
          0.0,
334
7.91k
          -0.13813540350758585f,
335
7.91k
          0.3302282550303788f,
336
7.91k
          0.0,
337
7.91k
          0.08755115000587084f,
338
7.91k
          -0.07946706605909573f,
339
7.91k
          -0.4613374887461511f,
340
7.91k
          -0.13813540350758294f,
341
7.91k
          -0.07946706605910261f,
342
7.91k
          0.49724647109535086f,
343
7.91k
          0.12538059448563663f,
344
7.91k
          0.3302282550303805f,
345
7.91k
          -0.4613374887461554f,
346
7.91k
          0.12538059448564315f,
347
7.91k
          -0.13813540350758452f,
348
7.91k
      },
349
7.91k
      {
350
7.91k
          0.0,
351
7.91k
          0.0,
352
7.91k
          -0.17437602599651067f,
353
7.91k
          0.0702790691196284f,
354
7.91k
          0.0,
355
7.91k
          -0.2921026642334881f,
356
7.91k
          0.3623817333531167f,
357
7.91k
          0.0,
358
7.91k
          -0.1743760259965108f,
359
7.91k
          0.36238173335311646f,
360
7.91k
          0.29210266423348785f,
361
7.91k
          -0.4326608024727445f,
362
7.91k
          0.07027906911962818f,
363
7.91k
          0.0,
364
7.91k
          -0.4326608024727457f,
365
7.91k
          0.34875205199302267f,
366
7.91k
      },
367
7.91k
      {
368
7.91k
          0.0,
369
7.91k
          0.0,
370
7.91k
          0.11354987314994337f,
371
7.91k
          -0.07417504595810355f,
372
7.91k
          0.0,
373
7.91k
          0.19402893032594343f,
374
7.91k
          -0.435190496523228f,
375
7.91k
          0.21918684838857466f,
376
7.91k
          0.11354987314994257f,
377
7.91k
          -0.4351904965232251f,
378
7.91k
          0.5550443808910661f,
379
7.91k
          -0.25468277124066463f,
380
7.91k
          -0.07417504595810233f,
381
7.91k
          0.2191868483885728f,
382
7.91k
          -0.25468277124066413f,
383
7.91k
          0.1135498731499429f,
384
7.91k
      },
385
7.91k
  };
386
387
7.91k
  const HWY_CAPPED(float, 16) d;
388
134k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
126k
    auto pixel = Zero(d);
390
2.15M
    for (size_t j = 0; j < 16; j++) {
391
2.02M
      auto cf = Set(d, coeffs[j]);
392
2.02M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
2.02M
      pixel = MulAdd(cf, basis, pixel);
394
2.02M
    }
395
126k
    Store(pixel, d, pixels + i);
396
126k
  }
397
7.91k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
7.91k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
7.91k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
7.91k
  size_t afv_x = afv_kind & 1;
404
7.91k
  size_t afv_y = afv_kind / 2;
405
7.91k
  float dcs[3] = {};
406
7.91k
  float block00 = coefficients[0];
407
7.91k
  float block01 = coefficients[1];
408
7.91k
  float block10 = coefficients[8];
409
7.91k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
7.91k
  dcs[1] = (block00 + block10 - block01);
411
7.91k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
7.91k
  HWY_ALIGN float coeff[4 * 4];
414
7.91k
  coeff[0] = dcs[0];
415
39.5k
  for (size_t iy = 0; iy < 4; iy++) {
416
158k
    for (size_t ix = 0; ix < 4; ix++) {
417
126k
      if (ix == 0 && iy == 0) continue;
418
118k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
118k
    }
420
31.6k
  }
421
7.91k
  HWY_ALIGN float block[4 * 8];
422
7.91k
  AFVIDCT4x4(coeff, block);
423
39.5k
  for (size_t iy = 0; iy < 4; iy++) {
424
158k
    for (size_t ix = 0; ix < 4; ix++) {
425
126k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
126k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
126k
    }
428
31.6k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
7.91k
  block[0] = dcs[1];
431
39.5k
  for (size_t iy = 0; iy < 4; iy++) {
432
158k
    for (size_t ix = 0; ix < 4; ix++) {
433
126k
      if (ix == 0 && iy == 0) continue;
434
118k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
118k
    }
436
31.6k
  }
437
7.91k
  ComputeScaledIDCT<4, 4>()(
438
7.91k
      block,
439
7.91k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
7.91k
            pixels_stride),
441
7.91k
      scratch_space);
442
  // IDCT4x8.
443
7.91k
  block[0] = dcs[2];
444
39.5k
  for (size_t iy = 0; iy < 4; iy++) {
445
284k
    for (size_t ix = 0; ix < 8; ix++) {
446
253k
      if (ix == 0 && iy == 0) continue;
447
245k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
245k
    }
449
31.6k
  }
450
7.91k
  ComputeScaledIDCT<4, 8>()(
451
7.91k
      block,
452
7.91k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
7.91k
      scratch_space);
454
7.91k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
66
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
66
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
66
  size_t afv_x = afv_kind & 1;
404
66
  size_t afv_y = afv_kind / 2;
405
66
  float dcs[3] = {};
406
66
  float block00 = coefficients[0];
407
66
  float block01 = coefficients[1];
408
66
  float block10 = coefficients[8];
409
66
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
66
  dcs[1] = (block00 + block10 - block01);
411
66
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
66
  HWY_ALIGN float coeff[4 * 4];
414
66
  coeff[0] = dcs[0];
415
330
  for (size_t iy = 0; iy < 4; iy++) {
416
1.32k
    for (size_t ix = 0; ix < 4; ix++) {
417
1.05k
      if (ix == 0 && iy == 0) continue;
418
990
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
990
    }
420
264
  }
421
66
  HWY_ALIGN float block[4 * 8];
422
66
  AFVIDCT4x4(coeff, block);
423
330
  for (size_t iy = 0; iy < 4; iy++) {
424
1.32k
    for (size_t ix = 0; ix < 4; ix++) {
425
1.05k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
1.05k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
1.05k
    }
428
264
  }
429
  // IDCT4x4 in (odd, even) positions.
430
66
  block[0] = dcs[1];
431
330
  for (size_t iy = 0; iy < 4; iy++) {
432
1.32k
    for (size_t ix = 0; ix < 4; ix++) {
433
1.05k
      if (ix == 0 && iy == 0) continue;
434
990
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
990
    }
436
264
  }
437
66
  ComputeScaledIDCT<4, 4>()(
438
66
      block,
439
66
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
66
            pixels_stride),
441
66
      scratch_space);
442
  // IDCT4x8.
443
66
  block[0] = dcs[2];
444
330
  for (size_t iy = 0; iy < 4; iy++) {
445
2.37k
    for (size_t ix = 0; ix < 8; ix++) {
446
2.11k
      if (ix == 0 && iy == 0) continue;
447
2.04k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
2.04k
    }
449
264
  }
450
66
  ComputeScaledIDCT<4, 8>()(
451
66
      block,
452
66
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
66
      scratch_space);
454
66
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
33
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
33
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
33
  size_t afv_x = afv_kind & 1;
404
33
  size_t afv_y = afv_kind / 2;
405
33
  float dcs[3] = {};
406
33
  float block00 = coefficients[0];
407
33
  float block01 = coefficients[1];
408
33
  float block10 = coefficients[8];
409
33
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
33
  dcs[1] = (block00 + block10 - block01);
411
33
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
33
  HWY_ALIGN float coeff[4 * 4];
414
33
  coeff[0] = dcs[0];
415
165
  for (size_t iy = 0; iy < 4; iy++) {
416
660
    for (size_t ix = 0; ix < 4; ix++) {
417
528
      if (ix == 0 && iy == 0) continue;
418
495
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
495
    }
420
132
  }
421
33
  HWY_ALIGN float block[4 * 8];
422
33
  AFVIDCT4x4(coeff, block);
423
165
  for (size_t iy = 0; iy < 4; iy++) {
424
660
    for (size_t ix = 0; ix < 4; ix++) {
425
528
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
528
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
528
    }
428
132
  }
429
  // IDCT4x4 in (odd, even) positions.
430
33
  block[0] = dcs[1];
431
165
  for (size_t iy = 0; iy < 4; iy++) {
432
660
    for (size_t ix = 0; ix < 4; ix++) {
433
528
      if (ix == 0 && iy == 0) continue;
434
495
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
495
    }
436
132
  }
437
33
  ComputeScaledIDCT<4, 4>()(
438
33
      block,
439
33
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
33
            pixels_stride),
441
33
      scratch_space);
442
  // IDCT4x8.
443
33
  block[0] = dcs[2];
444
165
  for (size_t iy = 0; iy < 4; iy++) {
445
1.18k
    for (size_t ix = 0; ix < 8; ix++) {
446
1.05k
      if (ix == 0 && iy == 0) continue;
447
1.02k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.02k
    }
449
132
  }
450
33
  ComputeScaledIDCT<4, 8>()(
451
33
      block,
452
33
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
33
      scratch_space);
454
33
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
450
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
450
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
450
  size_t afv_x = afv_kind & 1;
404
450
  size_t afv_y = afv_kind / 2;
405
450
  float dcs[3] = {};
406
450
  float block00 = coefficients[0];
407
450
  float block01 = coefficients[1];
408
450
  float block10 = coefficients[8];
409
450
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
450
  dcs[1] = (block00 + block10 - block01);
411
450
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
450
  HWY_ALIGN float coeff[4 * 4];
414
450
  coeff[0] = dcs[0];
415
2.25k
  for (size_t iy = 0; iy < 4; iy++) {
416
9.00k
    for (size_t ix = 0; ix < 4; ix++) {
417
7.20k
      if (ix == 0 && iy == 0) continue;
418
6.75k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
6.75k
    }
420
1.80k
  }
421
450
  HWY_ALIGN float block[4 * 8];
422
450
  AFVIDCT4x4(coeff, block);
423
2.25k
  for (size_t iy = 0; iy < 4; iy++) {
424
9.00k
    for (size_t ix = 0; ix < 4; ix++) {
425
7.20k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
7.20k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
7.20k
    }
428
1.80k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
450
  block[0] = dcs[1];
431
2.25k
  for (size_t iy = 0; iy < 4; iy++) {
432
9.00k
    for (size_t ix = 0; ix < 4; ix++) {
433
7.20k
      if (ix == 0 && iy == 0) continue;
434
6.75k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
6.75k
    }
436
1.80k
  }
437
450
  ComputeScaledIDCT<4, 4>()(
438
450
      block,
439
450
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
450
            pixels_stride),
441
450
      scratch_space);
442
  // IDCT4x8.
443
450
  block[0] = dcs[2];
444
2.25k
  for (size_t iy = 0; iy < 4; iy++) {
445
16.2k
    for (size_t ix = 0; ix < 8; ix++) {
446
14.4k
      if (ix == 0 && iy == 0) continue;
447
13.9k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
13.9k
    }
449
1.80k
  }
450
450
  ComputeScaledIDCT<4, 8>()(
451
450
      block,
452
450
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
450
      scratch_space);
454
450
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
7.36k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
7.36k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
7.36k
  size_t afv_x = afv_kind & 1;
404
7.36k
  size_t afv_y = afv_kind / 2;
405
7.36k
  float dcs[3] = {};
406
7.36k
  float block00 = coefficients[0];
407
7.36k
  float block01 = coefficients[1];
408
7.36k
  float block10 = coefficients[8];
409
7.36k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
7.36k
  dcs[1] = (block00 + block10 - block01);
411
7.36k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
7.36k
  HWY_ALIGN float coeff[4 * 4];
414
7.36k
  coeff[0] = dcs[0];
415
36.8k
  for (size_t iy = 0; iy < 4; iy++) {
416
147k
    for (size_t ix = 0; ix < 4; ix++) {
417
117k
      if (ix == 0 && iy == 0) continue;
418
110k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
110k
    }
420
29.4k
  }
421
7.36k
  HWY_ALIGN float block[4 * 8];
422
7.36k
  AFVIDCT4x4(coeff, block);
423
36.8k
  for (size_t iy = 0; iy < 4; iy++) {
424
147k
    for (size_t ix = 0; ix < 4; ix++) {
425
117k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
117k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
117k
    }
428
29.4k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
7.36k
  block[0] = dcs[1];
431
36.8k
  for (size_t iy = 0; iy < 4; iy++) {
432
147k
    for (size_t ix = 0; ix < 4; ix++) {
433
117k
      if (ix == 0 && iy == 0) continue;
434
110k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
110k
    }
436
29.4k
  }
437
7.36k
  ComputeScaledIDCT<4, 4>()(
438
7.36k
      block,
439
7.36k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
7.36k
            pixels_stride),
441
7.36k
      scratch_space);
442
  // IDCT4x8.
443
7.36k
  block[0] = dcs[2];
444
36.8k
  for (size_t iy = 0; iy < 4; iy++) {
445
265k
    for (size_t ix = 0; ix < 8; ix++) {
446
235k
      if (ix == 0 && iy == 0) continue;
447
228k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
228k
    }
449
29.4k
  }
450
7.36k
  ComputeScaledIDCT<4, 8>()(
451
7.36k
      block,
452
7.36k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
7.36k
      scratch_space);
454
7.36k
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
124k
                                        float* scratch_space) {
461
124k
  using Type = AcStrategyType;
462
124k
  switch (strategy) {
463
19.9k
    case Type::IDENTITY: {
464
19.9k
      float dcs[4] = {};
465
19.9k
      float block00 = coefficients[0];
466
19.9k
      float block01 = coefficients[1];
467
19.9k
      float block10 = coefficients[8];
468
19.9k
      float block11 = coefficients[9];
469
19.9k
      dcs[0] = block00 + block01 + block10 + block11;
470
19.9k
      dcs[1] = block00 + block01 - block10 - block11;
471
19.9k
      dcs[2] = block00 - block01 + block10 - block11;
472
19.9k
      dcs[3] = block00 - block01 - block10 + block11;
473
59.7k
      for (size_t y = 0; y < 2; y++) {
474
119k
        for (size_t x = 0; x < 2; x++) {
475
79.7k
          float block_dc = dcs[y * 2 + x];
476
79.7k
          float residual_sum = 0;
477
398k
          for (size_t iy = 0; iy < 4; iy++) {
478
1.59M
            for (size_t ix = 0; ix < 4; ix++) {
479
1.27M
              if (ix == 0 && iy == 0) continue;
480
1.19M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.19M
            }
482
318k
          }
483
79.7k
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
79.7k
              block_dc - residual_sum * (1.0f / 16);
485
398k
          for (size_t iy = 0; iy < 4; iy++) {
486
1.59M
            for (size_t ix = 0; ix < 4; ix++) {
487
1.27M
              if (ix == 1 && iy == 1) continue;
488
1.19M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.19M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.19M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.19M
            }
492
318k
          }
493
79.7k
          pixels[y * 4 * pixels_stride + x * 4] =
494
79.7k
              coefficients[(y + 2) * 8 + x + 2] +
495
79.7k
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
79.7k
        }
497
39.8k
      }
498
19.9k
      break;
499
0
    }
500
903
    case Type::DCT8X4: {
501
903
      float dcs[2] = {};
502
903
      float block0 = coefficients[0];
503
903
      float block1 = coefficients[8];
504
903
      dcs[0] = block0 + block1;
505
903
      dcs[1] = block0 - block1;
506
2.70k
      for (size_t x = 0; x < 2; x++) {
507
1.80k
        HWY_ALIGN float block[4 * 8];
508
1.80k
        block[0] = dcs[x];
509
9.03k
        for (size_t iy = 0; iy < 4; iy++) {
510
65.0k
          for (size_t ix = 0; ix < 8; ix++) {
511
57.7k
            if (ix == 0 && iy == 0) continue;
512
55.9k
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
55.9k
          }
514
7.22k
        }
515
1.80k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
1.80k
                                  scratch_space);
517
1.80k
      }
518
903
      break;
519
0
    }
520
759
    case Type::DCT4X8: {
521
759
      float dcs[2] = {};
522
759
      float block0 = coefficients[0];
523
759
      float block1 = coefficients[8];
524
759
      dcs[0] = block0 + block1;
525
759
      dcs[1] = block0 - block1;
526
2.27k
      for (size_t y = 0; y < 2; y++) {
527
1.51k
        HWY_ALIGN float block[4 * 8];
528
1.51k
        block[0] = dcs[y];
529
7.59k
        for (size_t iy = 0; iy < 4; iy++) {
530
54.6k
          for (size_t ix = 0; ix < 8; ix++) {
531
48.5k
            if (ix == 0 && iy == 0) continue;
532
47.0k
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
47.0k
          }
534
6.07k
        }
535
1.51k
        ComputeScaledIDCT<4, 8>()(
536
1.51k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
1.51k
            scratch_space);
538
1.51k
      }
539
759
      break;
540
0
    }
541
0
    case Type::DCT4X4: {
542
0
      float dcs[4] = {};
543
0
      float block00 = coefficients[0];
544
0
      float block01 = coefficients[1];
545
0
      float block10 = coefficients[8];
546
0
      float block11 = coefficients[9];
547
0
      dcs[0] = block00 + block01 + block10 + block11;
548
0
      dcs[1] = block00 + block01 - block10 - block11;
549
0
      dcs[2] = block00 - block01 + block10 - block11;
550
0
      dcs[3] = block00 - block01 - block10 + block11;
551
0
      for (size_t y = 0; y < 2; y++) {
552
0
        for (size_t x = 0; x < 2; x++) {
553
0
          HWY_ALIGN float block[4 * 4];
554
0
          block[0] = dcs[y * 2 + x];
555
0
          for (size_t iy = 0; iy < 4; iy++) {
556
0
            for (size_t ix = 0; ix < 4; ix++) {
557
0
              if (ix == 0 && iy == 0) continue;
558
0
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
0
            }
560
0
          }
561
0
          ComputeScaledIDCT<4, 4>()(
562
0
              block,
563
0
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
0
              scratch_space);
565
0
        }
566
0
      }
567
0
      break;
568
0
    }
569
3.65k
    case Type::DCT2X2: {
570
3.65k
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
3.65k
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
3.65k
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
3.65k
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
3.65k
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
32.8k
      for (size_t y = 0; y < kBlockDim; y++) {
576
263k
        for (size_t x = 0; x < kBlockDim; x++) {
577
233k
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
233k
        }
579
29.2k
      }
580
3.65k
      break;
581
0
    }
582
4.67k
    case Type::DCT16X16: {
583
4.67k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
4.67k
                                  scratch_space);
585
4.67k
      break;
586
0
    }
587
402
    case Type::DCT16X8: {
588
402
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
402
                                 scratch_space);
590
402
      break;
591
0
    }
592
67.1k
    case Type::DCT8X16: {
593
67.1k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
67.1k
                                 scratch_space);
595
67.1k
      break;
596
0
    }
597
588
    case Type::DCT32X8: {
598
588
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
588
                                 scratch_space);
600
588
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
219
    case Type::DCT32X16: {
608
219
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
219
                                  scratch_space);
610
219
      break;
611
0
    }
612
132
    case Type::DCT16X32: {
613
132
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
132
                                  scratch_space);
615
132
      break;
616
0
    }
617
120
    case Type::DCT32X32: {
618
120
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
120
                                  scratch_space);
620
120
      break;
621
0
    }
622
17.6k
    case Type::DCT: {
623
17.6k
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
17.6k
                                scratch_space);
625
17.6k
      break;
626
0
    }
627
66
    case Type::AFV0: {
628
66
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
66
      break;
630
0
    }
631
33
    case Type::AFV1: {
632
33
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
33
      break;
634
0
    }
635
450
    case Type::AFV2: {
636
450
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
450
      break;
638
0
    }
639
7.36k
    case Type::AFV3: {
640
7.36k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
7.36k
      break;
642
0
    }
643
0
    case Type::DCT64X32: {
644
0
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
63
    case Type::DCT32X64: {
649
63
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
63
                                  scratch_space);
651
63
      break;
652
0
    }
653
354
    case Type::DCT64X64: {
654
354
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
354
                                  scratch_space);
656
354
      break;
657
0
    }
658
123
    case Type::DCT128X64: {
659
123
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
123
                                   scratch_space);
661
123
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
12
    case Type::DCT128X256: {
679
12
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
12
                                    scratch_space);
681
12
      break;
682
0
    }
683
9
    case Type::DCT256X256: {
684
9
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
9
                                    scratch_space);
686
9
      break;
687
0
    }
688
124k
  }
689
124k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
124k
                                        float* scratch_space) {
461
124k
  using Type = AcStrategyType;
462
124k
  switch (strategy) {
463
19.9k
    case Type::IDENTITY: {
464
19.9k
      float dcs[4] = {};
465
19.9k
      float block00 = coefficients[0];
466
19.9k
      float block01 = coefficients[1];
467
19.9k
      float block10 = coefficients[8];
468
19.9k
      float block11 = coefficients[9];
469
19.9k
      dcs[0] = block00 + block01 + block10 + block11;
470
19.9k
      dcs[1] = block00 + block01 - block10 - block11;
471
19.9k
      dcs[2] = block00 - block01 + block10 - block11;
472
19.9k
      dcs[3] = block00 - block01 - block10 + block11;
473
59.7k
      for (size_t y = 0; y < 2; y++) {
474
119k
        for (size_t x = 0; x < 2; x++) {
475
79.7k
          float block_dc = dcs[y * 2 + x];
476
79.7k
          float residual_sum = 0;
477
398k
          for (size_t iy = 0; iy < 4; iy++) {
478
1.59M
            for (size_t ix = 0; ix < 4; ix++) {
479
1.27M
              if (ix == 0 && iy == 0) continue;
480
1.19M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.19M
            }
482
318k
          }
483
79.7k
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
79.7k
              block_dc - residual_sum * (1.0f / 16);
485
398k
          for (size_t iy = 0; iy < 4; iy++) {
486
1.59M
            for (size_t ix = 0; ix < 4; ix++) {
487
1.27M
              if (ix == 1 && iy == 1) continue;
488
1.19M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.19M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.19M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.19M
            }
492
318k
          }
493
79.7k
          pixels[y * 4 * pixels_stride + x * 4] =
494
79.7k
              coefficients[(y + 2) * 8 + x + 2] +
495
79.7k
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
79.7k
        }
497
39.8k
      }
498
19.9k
      break;
499
0
    }
500
903
    case Type::DCT8X4: {
501
903
      float dcs[2] = {};
502
903
      float block0 = coefficients[0];
503
903
      float block1 = coefficients[8];
504
903
      dcs[0] = block0 + block1;
505
903
      dcs[1] = block0 - block1;
506
2.70k
      for (size_t x = 0; x < 2; x++) {
507
1.80k
        HWY_ALIGN float block[4 * 8];
508
1.80k
        block[0] = dcs[x];
509
9.03k
        for (size_t iy = 0; iy < 4; iy++) {
510
65.0k
          for (size_t ix = 0; ix < 8; ix++) {
511
57.7k
            if (ix == 0 && iy == 0) continue;
512
55.9k
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
55.9k
          }
514
7.22k
        }
515
1.80k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
1.80k
                                  scratch_space);
517
1.80k
      }
518
903
      break;
519
0
    }
520
759
    case Type::DCT4X8: {
521
759
      float dcs[2] = {};
522
759
      float block0 = coefficients[0];
523
759
      float block1 = coefficients[8];
524
759
      dcs[0] = block0 + block1;
525
759
      dcs[1] = block0 - block1;
526
2.27k
      for (size_t y = 0; y < 2; y++) {
527
1.51k
        HWY_ALIGN float block[4 * 8];
528
1.51k
        block[0] = dcs[y];
529
7.59k
        for (size_t iy = 0; iy < 4; iy++) {
530
54.6k
          for (size_t ix = 0; ix < 8; ix++) {
531
48.5k
            if (ix == 0 && iy == 0) continue;
532
47.0k
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
47.0k
          }
534
6.07k
        }
535
1.51k
        ComputeScaledIDCT<4, 8>()(
536
1.51k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
1.51k
            scratch_space);
538
1.51k
      }
539
759
      break;
540
0
    }
541
0
    case Type::DCT4X4: {
542
0
      float dcs[4] = {};
543
0
      float block00 = coefficients[0];
544
0
      float block01 = coefficients[1];
545
0
      float block10 = coefficients[8];
546
0
      float block11 = coefficients[9];
547
0
      dcs[0] = block00 + block01 + block10 + block11;
548
0
      dcs[1] = block00 + block01 - block10 - block11;
549
0
      dcs[2] = block00 - block01 + block10 - block11;
550
0
      dcs[3] = block00 - block01 - block10 + block11;
551
0
      for (size_t y = 0; y < 2; y++) {
552
0
        for (size_t x = 0; x < 2; x++) {
553
0
          HWY_ALIGN float block[4 * 4];
554
0
          block[0] = dcs[y * 2 + x];
555
0
          for (size_t iy = 0; iy < 4; iy++) {
556
0
            for (size_t ix = 0; ix < 4; ix++) {
557
0
              if (ix == 0 && iy == 0) continue;
558
0
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
0
            }
560
0
          }
561
0
          ComputeScaledIDCT<4, 4>()(
562
0
              block,
563
0
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
0
              scratch_space);
565
0
        }
566
0
      }
567
0
      break;
568
0
    }
569
3.65k
    case Type::DCT2X2: {
570
3.65k
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
3.65k
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
3.65k
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
3.65k
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
3.65k
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
32.8k
      for (size_t y = 0; y < kBlockDim; y++) {
576
263k
        for (size_t x = 0; x < kBlockDim; x++) {
577
233k
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
233k
        }
579
29.2k
      }
580
3.65k
      break;
581
0
    }
582
4.67k
    case Type::DCT16X16: {
583
4.67k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
4.67k
                                  scratch_space);
585
4.67k
      break;
586
0
    }
587
402
    case Type::DCT16X8: {
588
402
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
402
                                 scratch_space);
590
402
      break;
591
0
    }
592
67.1k
    case Type::DCT8X16: {
593
67.1k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
67.1k
                                 scratch_space);
595
67.1k
      break;
596
0
    }
597
588
    case Type::DCT32X8: {
598
588
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
588
                                 scratch_space);
600
588
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
219
    case Type::DCT32X16: {
608
219
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
219
                                  scratch_space);
610
219
      break;
611
0
    }
612
132
    case Type::DCT16X32: {
613
132
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
132
                                  scratch_space);
615
132
      break;
616
0
    }
617
120
    case Type::DCT32X32: {
618
120
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
120
                                  scratch_space);
620
120
      break;
621
0
    }
622
17.6k
    case Type::DCT: {
623
17.6k
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
17.6k
                                scratch_space);
625
17.6k
      break;
626
0
    }
627
66
    case Type::AFV0: {
628
66
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
66
      break;
630
0
    }
631
33
    case Type::AFV1: {
632
33
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
33
      break;
634
0
    }
635
450
    case Type::AFV2: {
636
450
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
450
      break;
638
0
    }
639
7.36k
    case Type::AFV3: {
640
7.36k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
7.36k
      break;
642
0
    }
643
0
    case Type::DCT64X32: {
644
0
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
63
    case Type::DCT32X64: {
649
63
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
63
                                  scratch_space);
651
63
      break;
652
0
    }
653
354
    case Type::DCT64X64: {
654
354
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
354
                                  scratch_space);
656
354
      break;
657
0
    }
658
123
    case Type::DCT128X64: {
659
123
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
123
                                   scratch_space);
661
123
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
12
    case Type::DCT128X256: {
679
12
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
12
                                    scratch_space);
681
12
      break;
682
0
    }
683
9
    case Type::DCT256X256: {
684
9
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
9
                                    scratch_space);
686
9
      break;
687
0
    }
688
124k
  }
689
124k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
124k
                                              float* JXL_RESTRICT scratch) {
695
124k
  using Type = AcStrategyType;
696
124k
  HWY_ALIGN float warm_block[4 * 4];
697
124k
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
124k
  switch (strategy) {
699
402
    case Type::DCT16X8: {
700
402
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
402
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
402
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
402
      break;
704
0
    }
705
66.8k
    case Type::DCT8X16: {
706
66.8k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
66.8k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
66.8k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
66.8k
      break;
710
0
    }
711
4.66k
    case Type::DCT16X16: {
712
4.66k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
4.66k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
4.66k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
4.66k
      break;
716
0
    }
717
588
    case Type::DCT32X8: {
718
588
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
588
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
588
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
588
      break;
722
0
    }
723
0
    case Type::DCT8X32: {
724
0
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
0
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
0
      break;
728
0
    }
729
219
    case Type::DCT32X16: {
730
219
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
219
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
219
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
219
      break;
734
0
    }
735
132
    case Type::DCT16X32: {
736
132
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
132
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
132
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
132
      break;
740
0
    }
741
120
    case Type::DCT32X32: {
742
120
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
120
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
120
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
120
      break;
746
0
    }
747
0
    case Type::DCT64X32: {
748
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
0
      break;
752
0
    }
753
63
    case Type::DCT32X64: {
754
63
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
63
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
63
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
63
      break;
758
0
    }
759
354
    case Type::DCT64X64: {
760
354
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
354
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
354
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
354
      break;
764
0
    }
765
123
    case Type::DCT128X64: {
766
123
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
123
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
123
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
123
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
12
    case Type::DCT128X256: {
792
12
      ReinterpretingDCT<
793
12
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
12
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
12
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
12
      break;
797
0
    }
798
9
    case Type::DCT256X256: {
799
9
      ReinterpretingDCT<
800
9
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
9
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
9
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
9
      break;
804
0
    }
805
17.6k
    case Type::DCT:
806
21.3k
    case Type::DCT2X2:
807
21.3k
    case Type::DCT4X4:
808
22.0k
    case Type::DCT4X8:
809
22.9k
    case Type::DCT8X4:
810
23.0k
    case Type::AFV0:
811
23.0k
    case Type::AFV1:
812
23.5k
    case Type::AFV2:
813
30.9k
    case Type::AFV3:
814
50.8k
    case Type::IDENTITY:
815
50.8k
      llf[0] = dc[0];
816
50.8k
      break;
817
124k
  };
818
124k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
124k
                                              float* JXL_RESTRICT scratch) {
695
124k
  using Type = AcStrategyType;
696
124k
  HWY_ALIGN float warm_block[4 * 4];
697
124k
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
124k
  switch (strategy) {
699
402
    case Type::DCT16X8: {
700
402
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
402
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
402
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
402
      break;
704
0
    }
705
66.8k
    case Type::DCT8X16: {
706
66.8k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
66.8k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
66.8k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
66.8k
      break;
710
0
    }
711
4.66k
    case Type::DCT16X16: {
712
4.66k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
4.66k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
4.66k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
4.66k
      break;
716
0
    }
717
588
    case Type::DCT32X8: {
718
588
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
588
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
588
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
588
      break;
722
0
    }
723
0
    case Type::DCT8X32: {
724
0
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
0
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
0
      break;
728
0
    }
729
219
    case Type::DCT32X16: {
730
219
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
219
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
219
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
219
      break;
734
0
    }
735
132
    case Type::DCT16X32: {
736
132
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
132
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
132
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
132
      break;
740
0
    }
741
120
    case Type::DCT32X32: {
742
120
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
120
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
120
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
120
      break;
746
0
    }
747
0
    case Type::DCT64X32: {
748
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
0
      break;
752
0
    }
753
63
    case Type::DCT32X64: {
754
63
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
63
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
63
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
63
      break;
758
0
    }
759
354
    case Type::DCT64X64: {
760
354
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
354
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
354
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
354
      break;
764
0
    }
765
123
    case Type::DCT128X64: {
766
123
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
123
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
123
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
123
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
12
    case Type::DCT128X256: {
792
12
      ReinterpretingDCT<
793
12
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
12
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
12
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
12
      break;
797
0
    }
798
9
    case Type::DCT256X256: {
799
9
      ReinterpretingDCT<
800
9
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
9
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
9
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
9
      break;
804
0
    }
805
17.6k
    case Type::DCT:
806
21.3k
    case Type::DCT2X2:
807
21.3k
    case Type::DCT4X4:
808
22.0k
    case Type::DCT4X8:
809
22.9k
    case Type::DCT8X4:
810
23.0k
    case Type::AFV0:
811
23.0k
    case Type::AFV1:
812
23.5k
    case Type::AFV2:
813
30.9k
    case Type::AFV3:
814
50.8k
    case Type::IDENTITY:
815
50.8k
      llf[0] = dc[0];
816
50.8k
      break;
817
124k
  };
818
124k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_