Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/modular/transform/rct.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/modular/transform/rct.h"
7
8
#include <cstddef>
9
#include <cstdint>
10
#include <utility>
11
12
#include "lib/jxl/base/data_parallel.h"
13
#include "lib/jxl/base/status.h"
14
#include "lib/jxl/modular/modular_image.h"
15
#include "lib/jxl/modular/transform/transform.h"
16
#undef HWY_TARGET_INCLUDE
17
#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/rct.cc"
18
#include <hwy/foreach_target.h>
19
#include <hwy/highway.h>
20
HWY_BEFORE_NAMESPACE();
21
namespace jxl {
22
namespace HWY_NAMESPACE {
23
24
// These templates are not found via ADL.
25
using hwy::HWY_NAMESPACE::Add;
26
using hwy::HWY_NAMESPACE::ShiftRight;
27
using hwy::HWY_NAMESPACE::Sub;
28
29
template <int transform_type>
30
void InvRCTRow(const pixel_type* in0, const pixel_type* in1,
31
               const pixel_type* in2, pixel_type* out0, pixel_type* out1,
32
360k
               pixel_type* out2, size_t w) {
33
360k
  static_assert(transform_type >= 0 && transform_type < 7,
34
360k
                "Invalid transform type");
35
360k
  int second = transform_type >> 1;
36
360k
  int third = transform_type & 1;
37
38
360k
  size_t x = 0;
39
360k
  const HWY_FULL(pixel_type) d;
40
360k
  const size_t N = Lanes(d);
41
4.01M
  for (; x + N - 1 < w; x += N) {
42
3.65M
    if (transform_type == 6) {
43
1.49M
      auto Y = Load(d, in0 + x);
44
1.49M
      auto Co = Load(d, in1 + x);
45
1.49M
      auto Cg = Load(d, in2 + x);
46
1.49M
      Y = Sub(Y, ShiftRight<1>(Cg));
47
1.49M
      auto G = Add(Cg, Y);
48
1.49M
      Y = Sub(Y, ShiftRight<1>(Co));
49
1.49M
      auto R = Add(Y, Co);
50
1.49M
      Store(R, d, out0 + x);
51
1.49M
      Store(G, d, out1 + x);
52
1.49M
      Store(Y, d, out2 + x);
53
2.16M
    } else {
54
2.16M
      auto First = Load(d, in0 + x);
55
2.16M
      auto Second = Load(d, in1 + x);
56
2.16M
      auto Third = Load(d, in2 + x);
57
2.16M
      if (third) Third = Add(Third, First);
58
2.16M
      if (second == 1) {
59
1.57M
        Second = Add(Second, First);
60
1.57M
      } else if (second == 2) {
61
522k
        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
62
522k
      }
63
2.16M
      Store(First, d, out0 + x);
64
2.16M
      Store(Second, d, out1 + x);
65
2.16M
      Store(Third, d, out2 + x);
66
2.16M
    }
67
3.65M
  }
68
1.03M
  for (; x < w; x++) {
69
673k
    if (transform_type == 6) {
70
577k
      pixel_type Y = in0[x];
71
577k
      pixel_type Co = in1[x];
72
577k
      pixel_type Cg = in2[x];
73
577k
      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
74
577k
      pixel_type G = PixelAdd(Cg, tmp);
75
577k
      pixel_type B = PixelAdd(tmp, -(Co >> 1));
76
577k
      pixel_type R = PixelAdd(B, Co);
77
577k
      out0[x] = R;
78
577k
      out1[x] = G;
79
577k
      out2[x] = B;
80
577k
    } else {
81
96.1k
      pixel_type First = in0[x];
82
96.1k
      pixel_type Second = in1[x];
83
96.1k
      pixel_type Third = in2[x];
84
96.1k
      if (third) Third = PixelAdd(Third, First);
85
96.1k
      if (second == 1) {
86
58.7k
        Second = PixelAdd(Second, First);
87
58.7k
      } else if (second == 2) {
88
26.6k
        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
89
26.6k
      }
90
96.1k
      out0[x] = First;
91
96.1k
      out1[x] = Second;
92
96.1k
      out2[x] = Third;
93
96.1k
    }
94
673k
  }
95
360k
}
Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX2::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
void jxl::N_AVX2::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Line
Count
Source
32
5.76k
               pixel_type* out2, size_t w) {
33
5.76k
  static_assert(transform_type >= 0 && transform_type < 7,
34
5.76k
                "Invalid transform type");
35
5.76k
  int second = transform_type >> 1;
36
5.76k
  int third = transform_type & 1;
37
38
5.76k
  size_t x = 0;
39
5.76k
  const HWY_FULL(pixel_type) d;
40
5.76k
  const size_t N = Lanes(d);
41
72.7k
  for (; x + N - 1 < w; x += N) {
42
66.9k
    if (transform_type == 6) {
43
0
      auto Y = Load(d, in0 + x);
44
0
      auto Co = Load(d, in1 + x);
45
0
      auto Cg = Load(d, in2 + x);
46
0
      Y = Sub(Y, ShiftRight<1>(Cg));
47
0
      auto G = Add(Cg, Y);
48
0
      Y = Sub(Y, ShiftRight<1>(Co));
49
0
      auto R = Add(Y, Co);
50
0
      Store(R, d, out0 + x);
51
0
      Store(G, d, out1 + x);
52
0
      Store(Y, d, out2 + x);
53
66.9k
    } else {
54
66.9k
      auto First = Load(d, in0 + x);
55
66.9k
      auto Second = Load(d, in1 + x);
56
66.9k
      auto Third = Load(d, in2 + x);
57
66.9k
      if (third) Third = Add(Third, First);
58
66.9k
      if (second == 1) {
59
0
        Second = Add(Second, First);
60
66.9k
      } else if (second == 2) {
61
0
        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
62
0
      }
63
66.9k
      Store(First, d, out0 + x);
64
66.9k
      Store(Second, d, out1 + x);
65
66.9k
      Store(Third, d, out2 + x);
66
66.9k
    }
67
66.9k
  }
68
16.5k
  for (; x < w; x++) {
69
10.7k
    if (transform_type == 6) {
70
0
      pixel_type Y = in0[x];
71
0
      pixel_type Co = in1[x];
72
0
      pixel_type Cg = in2[x];
73
0
      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
74
0
      pixel_type G = PixelAdd(Cg, tmp);
75
0
      pixel_type B = PixelAdd(tmp, -(Co >> 1));
76
0
      pixel_type R = PixelAdd(B, Co);
77
0
      out0[x] = R;
78
0
      out1[x] = G;
79
0
      out2[x] = B;
80
10.7k
    } else {
81
10.7k
      pixel_type First = in0[x];
82
10.7k
      pixel_type Second = in1[x];
83
10.7k
      pixel_type Third = in2[x];
84
10.7k
      if (third) Third = PixelAdd(Third, First);
85
10.7k
      if (second == 1) {
86
0
        Second = PixelAdd(Second, First);
87
10.7k
      } else if (second == 2) {
88
0
        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
89
0
      }
90
10.7k
      out0[x] = First;
91
10.7k
      out1[x] = Second;
92
10.7k
      out2[x] = Third;
93
10.7k
    }
94
10.7k
  }
95
5.76k
}
void jxl::N_AVX2::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Line
Count
Source
32
5.17k
               pixel_type* out2, size_t w) {
33
5.17k
  static_assert(transform_type >= 0 && transform_type < 7,
34
5.17k
                "Invalid transform type");
35
5.17k
  int second = transform_type >> 1;
36
5.17k
  int third = transform_type & 1;
37
38
5.17k
  size_t x = 0;
39
5.17k
  const HWY_FULL(pixel_type) d;
40
5.17k
  const size_t N = Lanes(d);
41
90.6k
  for (; x + N - 1 < w; x += N) {
42
85.4k
    if (transform_type == 6) {
43
0
      auto Y = Load(d, in0 + x);
44
0
      auto Co = Load(d, in1 + x);
45
0
      auto Cg = Load(d, in2 + x);
46
0
      Y = Sub(Y, ShiftRight<1>(Cg));
47
0
      auto G = Add(Cg, Y);
48
0
      Y = Sub(Y, ShiftRight<1>(Co));
49
0
      auto R = Add(Y, Co);
50
0
      Store(R, d, out0 + x);
51
0
      Store(G, d, out1 + x);
52
0
      Store(Y, d, out2 + x);
53
85.4k
    } else {
54
85.4k
      auto First = Load(d, in0 + x);
55
85.4k
      auto Second = Load(d, in1 + x);
56
85.4k
      auto Third = Load(d, in2 + x);
57
85.4k
      if (third) Third = Add(Third, First);
58
85.4k
      if (second == 1) {
59
85.4k
        Second = Add(Second, First);
60
85.4k
      } else if (second == 2) {
61
0
        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
62
0
      }
63
85.4k
      Store(First, d, out0 + x);
64
85.4k
      Store(Second, d, out1 + x);
65
85.4k
      Store(Third, d, out2 + x);
66
85.4k
    }
67
85.4k
  }
68
10.6k
  for (; x < w; x++) {
69
5.52k
    if (transform_type == 6) {
70
0
      pixel_type Y = in0[x];
71
0
      pixel_type Co = in1[x];
72
0
      pixel_type Cg = in2[x];
73
0
      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
74
0
      pixel_type G = PixelAdd(Cg, tmp);
75
0
      pixel_type B = PixelAdd(tmp, -(Co >> 1));
76
0
      pixel_type R = PixelAdd(B, Co);
77
0
      out0[x] = R;
78
0
      out1[x] = G;
79
0
      out2[x] = B;
80
5.52k
    } else {
81
5.52k
      pixel_type First = in0[x];
82
5.52k
      pixel_type Second = in1[x];
83
5.52k
      pixel_type Third = in2[x];
84
5.52k
      if (third) Third = PixelAdd(Third, First);
85
5.52k
      if (second == 1) {
86
5.52k
        Second = PixelAdd(Second, First);
87
5.52k
      } else if (second == 2) {
88
0
        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
89
0
      }
90
5.52k
      out0[x] = First;
91
5.52k
      out1[x] = Second;
92
5.52k
      out2[x] = Third;
93
5.52k
    }
94
5.52k
  }
95
5.17k
}
void jxl::N_AVX2::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Line
Count
Source
32
65.3k
               pixel_type* out2, size_t w) {
33
65.3k
  static_assert(transform_type >= 0 && transform_type < 7,
34
65.3k
                "Invalid transform type");
35
65.3k
  int second = transform_type >> 1;
36
65.3k
  int third = transform_type & 1;
37
38
65.3k
  size_t x = 0;
39
65.3k
  const HWY_FULL(pixel_type) d;
40
65.3k
  const size_t N = Lanes(d);
41
1.55M
  for (; x + N - 1 < w; x += N) {
42
1.48M
    if (transform_type == 6) {
43
0
      auto Y = Load(d, in0 + x);
44
0
      auto Co = Load(d, in1 + x);
45
0
      auto Cg = Load(d, in2 + x);
46
0
      Y = Sub(Y, ShiftRight<1>(Cg));
47
0
      auto G = Add(Cg, Y);
48
0
      Y = Sub(Y, ShiftRight<1>(Co));
49
0
      auto R = Add(Y, Co);
50
0
      Store(R, d, out0 + x);
51
0
      Store(G, d, out1 + x);
52
0
      Store(Y, d, out2 + x);
53
1.48M
    } else {
54
1.48M
      auto First = Load(d, in0 + x);
55
1.48M
      auto Second = Load(d, in1 + x);
56
1.48M
      auto Third = Load(d, in2 + x);
57
1.48M
      if (third) Third = Add(Third, First);
58
1.48M
      if (second == 1) {
59
1.48M
        Second = Add(Second, First);
60
1.48M
      } else if (second == 2) {
61
0
        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
62
0
      }
63
1.48M
      Store(First, d, out0 + x);
64
1.48M
      Store(Second, d, out1 + x);
65
1.48M
      Store(Third, d, out2 + x);
66
1.48M
    }
67
1.48M
  }
68
118k
  for (; x < w; x++) {
69
53.2k
    if (transform_type == 6) {
70
0
      pixel_type Y = in0[x];
71
0
      pixel_type Co = in1[x];
72
0
      pixel_type Cg = in2[x];
73
0
      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
74
0
      pixel_type G = PixelAdd(Cg, tmp);
75
0
      pixel_type B = PixelAdd(tmp, -(Co >> 1));
76
0
      pixel_type R = PixelAdd(B, Co);
77
0
      out0[x] = R;
78
0
      out1[x] = G;
79
0
      out2[x] = B;
80
53.2k
    } else {
81
53.2k
      pixel_type First = in0[x];
82
53.2k
      pixel_type Second = in1[x];
83
53.2k
      pixel_type Third = in2[x];
84
53.2k
      if (third) Third = PixelAdd(Third, First);
85
53.2k
      if (second == 1) {
86
53.2k
        Second = PixelAdd(Second, First);
87
53.2k
      } else if (second == 2) {
88
0
        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
89
0
      }
90
53.2k
      out0[x] = First;
91
53.2k
      out1[x] = Second;
92
53.2k
      out2[x] = Third;
93
53.2k
    }
94
53.2k
  }
95
65.3k
}
void jxl::N_AVX2::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Line
Count
Source
32
6.88k
               pixel_type* out2, size_t w) {
33
6.88k
  static_assert(transform_type >= 0 && transform_type < 7,
34
6.88k
                "Invalid transform type");
35
6.88k
  int second = transform_type >> 1;
36
6.88k
  int third = transform_type & 1;
37
38
6.88k
  size_t x = 0;
39
6.88k
  const HWY_FULL(pixel_type) d;
40
6.88k
  const size_t N = Lanes(d);
41
206k
  for (; x + N - 1 < w; x += N) {
42
200k
    if (transform_type == 6) {
43
0
      auto Y = Load(d, in0 + x);
44
0
      auto Co = Load(d, in1 + x);
45
0
      auto Cg = Load(d, in2 + x);
46
0
      Y = Sub(Y, ShiftRight<1>(Cg));
47
0
      auto G = Add(Cg, Y);
48
0
      Y = Sub(Y, ShiftRight<1>(Co));
49
0
      auto R = Add(Y, Co);
50
0
      Store(R, d, out0 + x);
51
0
      Store(G, d, out1 + x);
52
0
      Store(Y, d, out2 + x);
53
200k
    } else {
54
200k
      auto First = Load(d, in0 + x);
55
200k
      auto Second = Load(d, in1 + x);
56
200k
      auto Third = Load(d, in2 + x);
57
200k
      if (third) Third = Add(Third, First);
58
200k
      if (second == 1) {
59
0
        Second = Add(Second, First);
60
200k
      } else if (second == 2) {
61
200k
        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
62
200k
      }
63
200k
      Store(First, d, out0 + x);
64
200k
      Store(Second, d, out1 + x);
65
200k
      Store(Third, d, out2 + x);
66
200k
    }
67
200k
  }
68
21.5k
  for (; x < w; x++) {
69
14.6k
    if (transform_type == 6) {
70
0
      pixel_type Y = in0[x];
71
0
      pixel_type Co = in1[x];
72
0
      pixel_type Cg = in2[x];
73
0
      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
74
0
      pixel_type G = PixelAdd(Cg, tmp);
75
0
      pixel_type B = PixelAdd(tmp, -(Co >> 1));
76
0
      pixel_type R = PixelAdd(B, Co);
77
0
      out0[x] = R;
78
0
      out1[x] = G;
79
0
      out2[x] = B;
80
14.6k
    } else {
81
14.6k
      pixel_type First = in0[x];
82
14.6k
      pixel_type Second = in1[x];
83
14.6k
      pixel_type Third = in2[x];
84
14.6k
      if (third) Third = PixelAdd(Third, First);
85
14.6k
      if (second == 1) {
86
0
        Second = PixelAdd(Second, First);
87
14.6k
      } else if (second == 2) {
88
14.6k
        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
89
14.6k
      }
90
14.6k
      out0[x] = First;
91
14.6k
      out1[x] = Second;
92
14.6k
      out2[x] = Third;
93
14.6k
    }
94
14.6k
  }
95
6.88k
}
void jxl::N_AVX2::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Line
Count
Source
32
20.1k
               pixel_type* out2, size_t w) {
33
20.1k
  static_assert(transform_type >= 0 && transform_type < 7,
34
20.1k
                "Invalid transform type");
35
20.1k
  int second = transform_type >> 1;
36
20.1k
  int third = transform_type & 1;
37
38
20.1k
  size_t x = 0;
39
20.1k
  const HWY_FULL(pixel_type) d;
40
20.1k
  const size_t N = Lanes(d);
41
342k
  for (; x + N - 1 < w; x += N) {
42
322k
    if (transform_type == 6) {
43
0
      auto Y = Load(d, in0 + x);
44
0
      auto Co = Load(d, in1 + x);
45
0
      auto Cg = Load(d, in2 + x);
46
0
      Y = Sub(Y, ShiftRight<1>(Cg));
47
0
      auto G = Add(Cg, Y);
48
0
      Y = Sub(Y, ShiftRight<1>(Co));
49
0
      auto R = Add(Y, Co);
50
0
      Store(R, d, out0 + x);
51
0
      Store(G, d, out1 + x);
52
0
      Store(Y, d, out2 + x);
53
322k
    } else {
54
322k
      auto First = Load(d, in0 + x);
55
322k
      auto Second = Load(d, in1 + x);
56
322k
      auto Third = Load(d, in2 + x);
57
322k
      if (third) Third = Add(Third, First);
58
322k
      if (second == 1) {
59
0
        Second = Add(Second, First);
60
322k
      } else if (second == 2) {
61
322k
        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
62
322k
      }
63
322k
      Store(First, d, out0 + x);
64
322k
      Store(Second, d, out1 + x);
65
322k
      Store(Third, d, out2 + x);
66
322k
    }
67
322k
  }
68
32.1k
  for (; x < w; x++) {
69
11.9k
    if (transform_type == 6) {
70
0
      pixel_type Y = in0[x];
71
0
      pixel_type Co = in1[x];
72
0
      pixel_type Cg = in2[x];
73
0
      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
74
0
      pixel_type G = PixelAdd(Cg, tmp);
75
0
      pixel_type B = PixelAdd(tmp, -(Co >> 1));
76
0
      pixel_type R = PixelAdd(B, Co);
77
0
      out0[x] = R;
78
0
      out1[x] = G;
79
0
      out2[x] = B;
80
11.9k
    } else {
81
11.9k
      pixel_type First = in0[x];
82
11.9k
      pixel_type Second = in1[x];
83
11.9k
      pixel_type Third = in2[x];
84
11.9k
      if (third) Third = PixelAdd(Third, First);
85
11.9k
      if (second == 1) {
86
0
        Second = PixelAdd(Second, First);
87
11.9k
      } else if (second == 2) {
88
11.9k
        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
89
11.9k
      }
90
11.9k
      out0[x] = First;
91
11.9k
      out1[x] = Second;
92
11.9k
      out2[x] = Third;
93
11.9k
    }
94
11.9k
  }
95
20.1k
}
void jxl::N_AVX2::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Line
Count
Source
32
256k
               pixel_type* out2, size_t w) {
33
256k
  static_assert(transform_type >= 0 && transform_type < 7,
34
256k
                "Invalid transform type");
35
256k
  int second = transform_type >> 1;
36
256k
  int third = transform_type & 1;
37
38
256k
  size_t x = 0;
39
256k
  const HWY_FULL(pixel_type) d;
40
256k
  const size_t N = Lanes(d);
41
1.75M
  for (; x + N - 1 < w; x += N) {
42
1.49M
    if (transform_type == 6) {
43
1.49M
      auto Y = Load(d, in0 + x);
44
1.49M
      auto Co = Load(d, in1 + x);
45
1.49M
      auto Cg = Load(d, in2 + x);
46
1.49M
      Y = Sub(Y, ShiftRight<1>(Cg));
47
1.49M
      auto G = Add(Cg, Y);
48
1.49M
      Y = Sub(Y, ShiftRight<1>(Co));
49
1.49M
      auto R = Add(Y, Co);
50
1.49M
      Store(R, d, out0 + x);
51
1.49M
      Store(G, d, out1 + x);
52
1.49M
      Store(Y, d, out2 + x);
53
1.49M
    } else {
54
0
      auto First = Load(d, in0 + x);
55
0
      auto Second = Load(d, in1 + x);
56
0
      auto Third = Load(d, in2 + x);
57
0
      if (third) Third = Add(Third, First);
58
0
      if (second == 1) {
59
0
        Second = Add(Second, First);
60
0
      } else if (second == 2) {
61
0
        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
62
0
      }
63
0
      Store(First, d, out0 + x);
64
0
      Store(Second, d, out1 + x);
65
0
      Store(Third, d, out2 + x);
66
0
    }
67
1.49M
  }
68
834k
  for (; x < w; x++) {
69
577k
    if (transform_type == 6) {
70
577k
      pixel_type Y = in0[x];
71
577k
      pixel_type Co = in1[x];
72
577k
      pixel_type Cg = in2[x];
73
577k
      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
74
577k
      pixel_type G = PixelAdd(Cg, tmp);
75
577k
      pixel_type B = PixelAdd(tmp, -(Co >> 1));
76
577k
      pixel_type R = PixelAdd(B, Co);
77
577k
      out0[x] = R;
78
577k
      out1[x] = G;
79
577k
      out2[x] = B;
80
577k
    } else {
81
0
      pixel_type First = in0[x];
82
0
      pixel_type Second = in1[x];
83
0
      pixel_type Third = in2[x];
84
0
      if (third) Third = PixelAdd(Third, First);
85
0
      if (second == 1) {
86
0
        Second = PixelAdd(Second, First);
87
0
      } else if (second == 2) {
88
0
        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
89
0
      }
90
0
      out0[x] = First;
91
0
      out1[x] = Second;
92
0
      out2[x] = Third;
93
0
    }
94
577k
  }
95
256k
}
Unexecuted instantiation: void jxl::N_AVX3::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_ZEN4::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_ZEN4::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_ZEN4::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_ZEN4::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_ZEN4::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_ZEN4::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_ZEN4::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_SPR::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_SPR::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_SPR::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_SPR::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_SPR::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_SPR::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_AVX3_SPR::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long)
96
97
11.6k
Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
98
11.6k
  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
99
11.6k
  size_t m = begin_c;
100
11.6k
  Channel& c0 = input.channel[m + 0];
101
11.6k
  size_t w = c0.w;
102
11.6k
  size_t h = c0.h;
103
11.6k
  if (rct_type == 0) {  // noop
104
5.19k
    return true;
105
5.19k
  }
106
  // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
107
6.43k
  int permutation = rct_type / 7;
108
6.43k
  JXL_ENSURE(permutation < 6);
109
  // 0-5 values have the low bit corresponding to Third and the high bits
110
  // corresponding to Second. 6 corresponds to YCoCg.
111
  //
112
  // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
113
  //
114
  // Third: 0=nop, 1=SubtractFirst
115
6.43k
  int custom = rct_type % 7;
116
  // Special case: permute-only. Swap channels around.
117
6.43k
  if (custom == 0) {
118
1.83k
    Channel ch0 = std::move(input.channel[m]);
119
1.83k
    Channel ch1 = std::move(input.channel[m + 1]);
120
1.83k
    Channel ch2 = std::move(input.channel[m + 2]);
121
1.83k
    input.channel[m + (permutation % 3)] = std::move(ch0);
122
1.83k
    input.channel[m + ((permutation + 1 + permutation / 3) % 3)] =
123
1.83k
        std::move(ch1);
124
1.83k
    input.channel[m + ((permutation + 2 - permutation / 3) % 3)] =
125
1.83k
        std::move(ch2);
126
1.83k
    return true;
127
1.83k
  }
128
4.60k
  constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = {
129
4.60k
      InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>,
130
4.60k
      InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>};
131
4.60k
  const auto process_row = [&](const uint32_t task,
132
360k
                               size_t /* thread */) -> Status {
133
360k
    const size_t y = task;
134
360k
    const pixel_type* in0 = input.channel[m].Row(y);
135
360k
    const pixel_type* in1 = input.channel[m + 1].Row(y);
136
360k
    const pixel_type* in2 = input.channel[m + 2].Row(y);
137
360k
    pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y);
138
360k
    pixel_type* out1 =
139
360k
        input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
140
360k
    pixel_type* out2 =
141
360k
        input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
142
360k
    inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w);
143
360k
    return true;
144
360k
  };
Unexecuted instantiation: rct.cc:jxl::N_SSE4::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const
rct.cc:jxl::N_AVX2::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const
Line
Count
Source
132
360k
                               size_t /* thread */) -> Status {
133
360k
    const size_t y = task;
134
360k
    const pixel_type* in0 = input.channel[m].Row(y);
135
360k
    const pixel_type* in1 = input.channel[m + 1].Row(y);
136
360k
    const pixel_type* in2 = input.channel[m + 2].Row(y);
137
360k
    pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y);
138
360k
    pixel_type* out1 =
139
360k
        input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
140
360k
    pixel_type* out2 =
141
360k
        input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
142
360k
    inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w);
143
360k
    return true;
144
360k
  };
Unexecuted instantiation: rct.cc:jxl::N_AVX3::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const
Unexecuted instantiation: rct.cc:jxl::N_AVX3_ZEN4::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const
Unexecuted instantiation: rct.cc:jxl::N_AVX3_SPR::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const
Unexecuted instantiation: rct.cc:jxl::N_SSE2::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const
145
4.60k
  JXL_RETURN_IF_ERROR(
146
4.60k
      RunOnPool(pool, 0, h, ThreadPool::NoInit, process_row, "InvRCT"));
147
4.60k
  return true;
148
4.60k
}
Unexecuted instantiation: jxl::N_SSE4::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)
jxl::N_AVX2::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)
Line
Count
Source
97
11.6k
Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
98
11.6k
  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
99
11.6k
  size_t m = begin_c;
100
11.6k
  Channel& c0 = input.channel[m + 0];
101
11.6k
  size_t w = c0.w;
102
11.6k
  size_t h = c0.h;
103
11.6k
  if (rct_type == 0) {  // noop
104
5.19k
    return true;
105
5.19k
  }
106
  // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
107
6.43k
  int permutation = rct_type / 7;
108
6.43k
  JXL_ENSURE(permutation < 6);
109
  // 0-5 values have the low bit corresponding to Third and the high bits
110
  // corresponding to Second. 6 corresponds to YCoCg.
111
  //
112
  // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
113
  //
114
  // Third: 0=nop, 1=SubtractFirst
115
6.43k
  int custom = rct_type % 7;
116
  // Special case: permute-only. Swap channels around.
117
6.43k
  if (custom == 0) {
118
1.83k
    Channel ch0 = std::move(input.channel[m]);
119
1.83k
    Channel ch1 = std::move(input.channel[m + 1]);
120
1.83k
    Channel ch2 = std::move(input.channel[m + 2]);
121
1.83k
    input.channel[m + (permutation % 3)] = std::move(ch0);
122
1.83k
    input.channel[m + ((permutation + 1 + permutation / 3) % 3)] =
123
1.83k
        std::move(ch1);
124
1.83k
    input.channel[m + ((permutation + 2 - permutation / 3) % 3)] =
125
1.83k
        std::move(ch2);
126
1.83k
    return true;
127
1.83k
  }
128
4.60k
  constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = {
129
4.60k
      InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>,
130
4.60k
      InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>};
131
4.60k
  const auto process_row = [&](const uint32_t task,
132
4.60k
                               size_t /* thread */) -> Status {
133
4.60k
    const size_t y = task;
134
4.60k
    const pixel_type* in0 = input.channel[m].Row(y);
135
4.60k
    const pixel_type* in1 = input.channel[m + 1].Row(y);
136
4.60k
    const pixel_type* in2 = input.channel[m + 2].Row(y);
137
4.60k
    pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y);
138
4.60k
    pixel_type* out1 =
139
4.60k
        input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
140
4.60k
    pixel_type* out2 =
141
4.60k
        input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
142
4.60k
    inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w);
143
4.60k
    return true;
144
4.60k
  };
145
4.60k
  JXL_RETURN_IF_ERROR(
146
4.60k
      RunOnPool(pool, 0, h, ThreadPool::NoInit, process_row, "InvRCT"));
147
4.60k
  return true;
148
4.60k
}
Unexecuted instantiation: jxl::N_AVX3::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)
Unexecuted instantiation: jxl::N_AVX3_SPR::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)
Unexecuted instantiation: jxl::N_SSE2::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)
149
150
}  // namespace HWY_NAMESPACE
151
}  // namespace jxl
152
HWY_AFTER_NAMESPACE();
153
154
#if HWY_ONCE
155
namespace jxl {
156
157
HWY_EXPORT(InvRCT);
158
11.6k
Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
159
11.6k
  return HWY_DYNAMIC_DISPATCH(InvRCT)(input, begin_c, rct_type, pool);
160
11.6k
}
161
162
}  // namespace jxl
163
#endif