/src/libjxl/lib/jxl/modular/transform/rct.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/modular/transform/rct.h" |
7 | | |
8 | | #include <cstddef> |
9 | | #include <cstdint> |
10 | | #include <utility> |
11 | | |
12 | | #include "lib/jxl/base/data_parallel.h" |
13 | | #include "lib/jxl/base/status.h" |
14 | | #include "lib/jxl/modular/modular_image.h" |
15 | | #include "lib/jxl/modular/transform/transform.h" |
16 | | #undef HWY_TARGET_INCLUDE |
17 | | #define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/rct.cc" |
18 | | #include <hwy/foreach_target.h> |
19 | | #include <hwy/highway.h> |
20 | | HWY_BEFORE_NAMESPACE(); |
21 | | namespace jxl { |
22 | | namespace HWY_NAMESPACE { |
23 | | |
24 | | // These templates are not found via ADL. |
25 | | using hwy::HWY_NAMESPACE::Add; |
26 | | using hwy::HWY_NAMESPACE::ShiftRight; |
27 | | using hwy::HWY_NAMESPACE::Sub; |
28 | | |
29 | | template <int transform_type> |
30 | | void InvRCTRow(const pixel_type* in0, const pixel_type* in1, |
31 | | const pixel_type* in2, pixel_type* out0, pixel_type* out1, |
32 | 53.4k | pixel_type* out2, size_t w) { |
33 | 53.4k | static_assert(transform_type >= 0 && transform_type < 7, |
34 | 53.4k | "Invalid transform type"); |
35 | 53.4k | int second = transform_type >> 1; |
36 | 53.4k | int third = transform_type & 1; |
37 | | |
38 | 53.4k | size_t x = 0; |
39 | 53.4k | const HWY_FULL(pixel_type) d; |
40 | 53.4k | const size_t N = Lanes(d); |
41 | 492k | for (; x + N - 1 < w; x += N) { |
42 | 439k | if (transform_type == 6) { |
43 | 300k | auto Y = Load(d, in0 + x); |
44 | 300k | auto Co = Load(d, in1 + x); |
45 | 300k | auto Cg = Load(d, in2 + x); |
46 | 300k | Y = Sub(Y, ShiftRight<1>(Cg)); |
47 | 300k | auto G = Add(Cg, Y); |
48 | 300k | Y = Sub(Y, ShiftRight<1>(Co)); |
49 | 300k | auto R = Add(Y, Co); |
50 | 300k | Store(R, d, out0 + x); |
51 | 300k | Store(G, d, out1 + x); |
52 | 300k | Store(Y, d, out2 + x); |
53 | 300k | } else { |
54 | 138k | auto First = Load(d, in0 + x); |
55 | 138k | auto Second = Load(d, in1 + x); |
56 | 138k | auto Third = Load(d, in2 + x); |
57 | 138k | if (third) Third = Add(Third, First); |
58 | 138k | if (second == 1) { |
59 | 126k | Second = Add(Second, First); |
60 | 126k | } else if (second == 2) { |
61 | 12.0k | Second = Add(Second, ShiftRight<1>(Add(First, Third))); |
62 | 12.0k | } |
63 | 138k | Store(First, d, out0 + x); |
64 | 138k | Store(Second, d, out1 + x); |
65 | 138k | Store(Third, d, out2 + x); |
66 | 138k | } |
67 | 439k | } |
68 | 113k | for (; x < w; x++) { |
69 | 60.0k | if (transform_type == 6) { |
70 | 43.5k | pixel_type Y = in0[x]; |
71 | 43.5k | pixel_type Co = in1[x]; |
72 | 43.5k | pixel_type Cg = in2[x]; |
73 | 43.5k | pixel_type tmp = PixelAdd(Y, -(Cg >> 1)); |
74 | 43.5k | pixel_type G = PixelAdd(Cg, tmp); |
75 | 43.5k | pixel_type B = PixelAdd(tmp, -(Co >> 1)); |
76 | 43.5k | pixel_type R = PixelAdd(B, Co); |
77 | 43.5k | out0[x] = R; |
78 | 43.5k | out1[x] = G; |
79 | 43.5k | out2[x] = B; |
80 | 43.5k | } else { |
81 | 16.4k | pixel_type First = in0[x]; |
82 | 16.4k | pixel_type Second = in1[x]; |
83 | 16.4k | pixel_type Third = in2[x]; |
84 | 16.4k | if (third) Third = PixelAdd(Third, First); |
85 | 16.4k | if (second == 1) { |
86 | 16.1k | Second = PixelAdd(Second, First); |
87 | 16.1k | } else if (second == 2) { |
88 | 391 | Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1)); |
89 | 391 | } |
90 | 16.4k | out0[x] = First; |
91 | 16.4k | out1[x] = Second; |
92 | 16.4k | out2[x] = Third; |
93 | 16.4k | } |
94 | 60.0k | } |
95 | 53.4k | } Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE4::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_AVX2::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_AVX2::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long) void jxl::N_AVX2::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Line | Count | Source | 32 | 15.3k | pixel_type* out2, size_t w) { | 33 | 15.3k | static_assert(transform_type >= 0 && transform_type < 7, | 34 | 15.3k | "Invalid transform type"); | 35 | 15.3k | int second = transform_type >> 1; | 36 | 15.3k | int third = transform_type & 1; | 37 | | | 38 | 15.3k | size_t x = 0; | 39 | 15.3k | const HWY_FULL(pixel_type) d; | 40 | 15.3k | const size_t N = Lanes(d); | 41 | 141k | for (; x + N - 1 < w; x += N) { | 42 | 126k | if (transform_type == 6) { | 43 | 0 | auto Y = Load(d, in0 + x); | 44 | 0 | auto Co = Load(d, in1 + x); | 45 | 0 | auto Cg = Load(d, in2 + x); | 46 | 0 | Y = Sub(Y, ShiftRight<1>(Cg)); | 47 | 0 | auto G = Add(Cg, Y); | 48 | 0 | Y = Sub(Y, ShiftRight<1>(Co)); | 49 | 0 | auto R = Add(Y, Co); | 50 | 0 | Store(R, d, out0 + x); | 51 | 0 | Store(G, d, out1 + x); | 52 | 0 | Store(Y, d, out2 + x); | 53 | 126k | } else { | 54 | 126k | auto First = Load(d, in0 + x); | 55 | 126k | auto Second = Load(d, in1 + x); | 56 | 126k | auto Third = Load(d, in2 + x); | 57 | 126k | if (third) Third = Add(Third, First); | 58 | 126k | if (second == 1) { | 59 | 126k | Second = Add(Second, First); | 60 | 126k | } else if (second == 2) { | 61 | 0 | Second = Add(Second, ShiftRight<1>(Add(First, Third))); | 62 | 0 | } | 63 | 126k | Store(First, d, out0 + x); | 64 | 126k | Store(Second, d, out1 + x); | 65 | 126k | Store(Third, d, out2 + x); | 66 | 126k | } | 67 | 126k | } | 68 | 31.3k | for (; x < w; x++) { | 69 | 16.0k | if (transform_type == 6) { | 70 | 0 | pixel_type Y = in0[x]; | 71 | 0 | pixel_type Co = in1[x]; | 72 | 0 | pixel_type Cg = in2[x]; | 73 | 0 | pixel_type tmp = PixelAdd(Y, -(Cg >> 1)); | 74 | 0 | pixel_type G = PixelAdd(Cg, tmp); | 75 | 0 | pixel_type B = PixelAdd(tmp, -(Co >> 1)); | 76 | 0 | pixel_type R = PixelAdd(B, Co); | 77 | 0 | out0[x] = R; | 78 | 0 | out1[x] = G; | 79 | 0 | out2[x] = B; | 80 | 16.0k | } else { | 81 | 16.0k | pixel_type First = in0[x]; | 82 | 16.0k | pixel_type Second = in1[x]; | 83 | 16.0k | pixel_type Third = in2[x]; | 84 | 16.0k | if (third) Third = PixelAdd(Third, First); | 85 | 16.0k | if (second == 1) { | 86 | 16.0k | Second = PixelAdd(Second, First); | 87 | 16.0k | } else if (second == 2) { | 88 | 0 | Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1)); | 89 | 0 | } | 90 | 16.0k | out0[x] = First; | 91 | 16.0k | out1[x] = Second; | 92 | 16.0k | out2[x] = Third; | 93 | 16.0k | } | 94 | 16.0k | } | 95 | 15.3k | } |
void jxl::N_AVX2::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Line | Count | Source | 32 | 19 | pixel_type* out2, size_t w) { | 33 | 19 | static_assert(transform_type >= 0 && transform_type < 7, | 34 | 19 | "Invalid transform type"); | 35 | 19 | int second = transform_type >> 1; | 36 | 19 | int third = transform_type & 1; | 37 | | | 38 | 19 | size_t x = 0; | 39 | 19 | const HWY_FULL(pixel_type) d; | 40 | 19 | const size_t N = Lanes(d); | 41 | 38 | for (; x + N - 1 < w; x += N) { | 42 | 19 | if (transform_type == 6) { | 43 | 0 | auto Y = Load(d, in0 + x); | 44 | 0 | auto Co = Load(d, in1 + x); | 45 | 0 | auto Cg = Load(d, in2 + x); | 46 | 0 | Y = Sub(Y, ShiftRight<1>(Cg)); | 47 | 0 | auto G = Add(Cg, Y); | 48 | 0 | Y = Sub(Y, ShiftRight<1>(Co)); | 49 | 0 | auto R = Add(Y, Co); | 50 | 0 | Store(R, d, out0 + x); | 51 | 0 | Store(G, d, out1 + x); | 52 | 0 | Store(Y, d, out2 + x); | 53 | 19 | } else { | 54 | 19 | auto First = Load(d, in0 + x); | 55 | 19 | auto Second = Load(d, in1 + x); | 56 | 19 | auto Third = Load(d, in2 + x); | 57 | 19 | if (third) Third = Add(Third, First); | 58 | 19 | if (second == 1) { | 59 | 19 | Second = Add(Second, First); | 60 | 19 | } else if (second == 2) { | 61 | 0 | Second = Add(Second, ShiftRight<1>(Add(First, Third))); | 62 | 0 | } | 63 | 19 | Store(First, d, out0 + x); | 64 | 19 | Store(Second, d, out1 + x); | 65 | 19 | Store(Third, d, out2 + x); | 66 | 19 | } | 67 | 19 | } | 68 | 114 | for (; x < w; x++) { | 69 | 95 | if (transform_type == 6) { | 70 | 0 | pixel_type Y = in0[x]; | 71 | 0 | pixel_type Co = in1[x]; | 72 | 0 | pixel_type Cg = in2[x]; | 73 | 0 | pixel_type tmp = PixelAdd(Y, -(Cg >> 1)); | 74 | 0 | pixel_type G = PixelAdd(Cg, tmp); | 75 | 0 | pixel_type B = PixelAdd(tmp, -(Co >> 1)); | 76 | 0 | pixel_type R = PixelAdd(B, Co); | 77 | 0 | out0[x] = R; | 78 | 0 | out1[x] = G; | 79 | 0 | out2[x] = B; | 80 | 95 | } else { | 81 | 95 | pixel_type First = in0[x]; | 82 | 95 | pixel_type Second = in1[x]; | 83 | 95 | pixel_type Third = in2[x]; | 84 | 95 | if (third) Third = PixelAdd(Third, First); | 85 | 95 | if (second == 1) { | 86 | 95 | Second = PixelAdd(Second, First); | 87 | 95 | } else if (second == 2) { | 88 | 0 | Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1)); | 89 | 0 | } | 90 | 95 | out0[x] = First; | 91 | 95 | out1[x] = Second; | 92 | 95 | out2[x] = Third; | 93 | 95 | } | 94 | 95 | } | 95 | 19 | } |
void jxl::N_AVX2::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Line | Count | Source | 32 | 177 | pixel_type* out2, size_t w) { | 33 | 177 | static_assert(transform_type >= 0 && transform_type < 7, | 34 | 177 | "Invalid transform type"); | 35 | 177 | int second = transform_type >> 1; | 36 | 177 | int third = transform_type & 1; | 37 | | | 38 | 177 | size_t x = 0; | 39 | 177 | const HWY_FULL(pixel_type) d; | 40 | 177 | const size_t N = Lanes(d); | 41 | 353 | for (; x + N - 1 < w; x += N) { | 42 | 176 | if (transform_type == 6) { | 43 | 0 | auto Y = Load(d, in0 + x); | 44 | 0 | auto Co = Load(d, in1 + x); | 45 | 0 | auto Cg = Load(d, in2 + x); | 46 | 0 | Y = Sub(Y, ShiftRight<1>(Cg)); | 47 | 0 | auto G = Add(Cg, Y); | 48 | 0 | Y = Sub(Y, ShiftRight<1>(Co)); | 49 | 0 | auto R = Add(Y, Co); | 50 | 0 | Store(R, d, out0 + x); | 51 | 0 | Store(G, d, out1 + x); | 52 | 0 | Store(Y, d, out2 + x); | 53 | 176 | } else { | 54 | 176 | auto First = Load(d, in0 + x); | 55 | 176 | auto Second = Load(d, in1 + x); | 56 | 176 | auto Third = Load(d, in2 + x); | 57 | 176 | if (third) Third = Add(Third, First); | 58 | 176 | if (second == 1) { | 59 | 0 | Second = Add(Second, First); | 60 | 176 | } else if (second == 2) { | 61 | 176 | Second = Add(Second, ShiftRight<1>(Add(First, Third))); | 62 | 176 | } | 63 | 176 | Store(First, d, out0 + x); | 64 | 176 | Store(Second, d, out1 + x); | 65 | 176 | Store(Third, d, out2 + x); | 66 | 176 | } | 67 | 176 | } | 68 | 321 | for (; x < w; x++) { | 69 | 144 | if (transform_type == 6) { | 70 | 0 | pixel_type Y = in0[x]; | 71 | 0 | pixel_type Co = in1[x]; | 72 | 0 | pixel_type Cg = in2[x]; | 73 | 0 | pixel_type tmp = PixelAdd(Y, -(Cg >> 1)); | 74 | 0 | pixel_type G = PixelAdd(Cg, tmp); | 75 | 0 | pixel_type B = PixelAdd(tmp, -(Co >> 1)); | 76 | 0 | pixel_type R = PixelAdd(B, Co); | 77 | 0 | out0[x] = R; | 78 | 0 | out1[x] = G; | 79 | 0 | out2[x] = B; | 80 | 144 | } else { | 81 | 144 | pixel_type First = in0[x]; | 82 | 144 | pixel_type Second = in1[x]; | 83 | 144 | pixel_type Third = in2[x]; | 84 | 144 | if (third) Third = PixelAdd(Third, First); | 85 | 144 | if (second == 1) { | 86 | 0 | Second = PixelAdd(Second, First); | 87 | 144 | } else if (second == 2) { | 88 | 144 | Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1)); | 89 | 144 | } | 90 | 144 | out0[x] = First; | 91 | 144 | out1[x] = Second; | 92 | 144 | out2[x] = Third; | 93 | 144 | } | 94 | 144 | } | 95 | 177 | } |
void jxl::N_AVX2::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Line | Count | Source | 32 | 11.9k | pixel_type* out2, size_t w) { | 33 | 11.9k | static_assert(transform_type >= 0 && transform_type < 7, | 34 | 11.9k | "Invalid transform type"); | 35 | 11.9k | int second = transform_type >> 1; | 36 | 11.9k | int third = transform_type & 1; | 37 | | | 38 | 11.9k | size_t x = 0; | 39 | 11.9k | const HWY_FULL(pixel_type) d; | 40 | 11.9k | const size_t N = Lanes(d); | 41 | 23.8k | for (; x + N - 1 < w; x += N) { | 42 | 11.9k | if (transform_type == 6) { | 43 | 0 | auto Y = Load(d, in0 + x); | 44 | 0 | auto Co = Load(d, in1 + x); | 45 | 0 | auto Cg = Load(d, in2 + x); | 46 | 0 | Y = Sub(Y, ShiftRight<1>(Cg)); | 47 | 0 | auto G = Add(Cg, Y); | 48 | 0 | Y = Sub(Y, ShiftRight<1>(Co)); | 49 | 0 | auto R = Add(Y, Co); | 50 | 0 | Store(R, d, out0 + x); | 51 | 0 | Store(G, d, out1 + x); | 52 | 0 | Store(Y, d, out2 + x); | 53 | 11.9k | } else { | 54 | 11.9k | auto First = Load(d, in0 + x); | 55 | 11.9k | auto Second = Load(d, in1 + x); | 56 | 11.9k | auto Third = Load(d, in2 + x); | 57 | 11.9k | if (third) Third = Add(Third, First); | 58 | 11.9k | if (second == 1) { | 59 | 0 | Second = Add(Second, First); | 60 | 11.9k | } else if (second == 2) { | 61 | 11.9k | Second = Add(Second, ShiftRight<1>(Add(First, Third))); | 62 | 11.9k | } | 63 | 11.9k | Store(First, d, out0 + x); | 64 | 11.9k | Store(Second, d, out1 + x); | 65 | 11.9k | Store(Third, d, out2 + x); | 66 | 11.9k | } | 67 | 11.9k | } | 68 | 12.2k | for (; x < w; x++) { | 69 | 247 | if (transform_type == 6) { | 70 | 0 | pixel_type Y = in0[x]; | 71 | 0 | pixel_type Co = in1[x]; | 72 | 0 | pixel_type Cg = in2[x]; | 73 | 0 | pixel_type tmp = PixelAdd(Y, -(Cg >> 1)); | 74 | 0 | pixel_type G = PixelAdd(Cg, tmp); | 75 | 0 | pixel_type B = PixelAdd(tmp, -(Co >> 1)); | 76 | 0 | pixel_type R = PixelAdd(B, Co); | 77 | 0 | out0[x] = R; | 78 | 0 | out1[x] = G; | 79 | 0 | out2[x] = B; | 80 | 247 | } else { | 81 | 247 | pixel_type First = in0[x]; | 82 | 247 | pixel_type Second = in1[x]; | 83 | 247 | pixel_type Third = in2[x]; | 84 | 247 | if (third) Third = PixelAdd(Third, First); | 85 | 247 | if (second == 1) { | 86 | 0 | Second = PixelAdd(Second, First); | 87 | 247 | } else if (second == 2) { | 88 | 247 | Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1)); | 89 | 247 | } | 90 | 247 | out0[x] = First; | 91 | 247 | out1[x] = Second; | 92 | 247 | out2[x] = Third; | 93 | 247 | } | 94 | 247 | } | 95 | 11.9k | } |
void jxl::N_AVX2::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Line | Count | Source | 32 | 25.9k | pixel_type* out2, size_t w) { | 33 | 25.9k | static_assert(transform_type >= 0 && transform_type < 7, | 34 | 25.9k | "Invalid transform type"); | 35 | 25.9k | int second = transform_type >> 1; | 36 | 25.9k | int third = transform_type & 1; | 37 | | | 38 | 25.9k | size_t x = 0; | 39 | 25.9k | const HWY_FULL(pixel_type) d; | 40 | 25.9k | const size_t N = Lanes(d); | 41 | 326k | for (; x + N - 1 < w; x += N) { | 42 | 300k | if (transform_type == 6) { | 43 | 300k | auto Y = Load(d, in0 + x); | 44 | 300k | auto Co = Load(d, in1 + x); | 45 | 300k | auto Cg = Load(d, in2 + x); | 46 | 300k | Y = Sub(Y, ShiftRight<1>(Cg)); | 47 | 300k | auto G = Add(Cg, Y); | 48 | 300k | Y = Sub(Y, ShiftRight<1>(Co)); | 49 | 300k | auto R = Add(Y, Co); | 50 | 300k | Store(R, d, out0 + x); | 51 | 300k | Store(G, d, out1 + x); | 52 | 300k | Store(Y, d, out2 + x); | 53 | 300k | } else { | 54 | 0 | auto First = Load(d, in0 + x); | 55 | 0 | auto Second = Load(d, in1 + x); | 56 | 0 | auto Third = Load(d, in2 + x); | 57 | 0 | if (third) Third = Add(Third, First); | 58 | 0 | if (second == 1) { | 59 | 0 | Second = Add(Second, First); | 60 | 0 | } else if (second == 2) { | 61 | 0 | Second = Add(Second, ShiftRight<1>(Add(First, Third))); | 62 | 0 | } | 63 | 0 | Store(First, d, out0 + x); | 64 | 0 | Store(Second, d, out1 + x); | 65 | 0 | Store(Third, d, out2 + x); | 66 | 0 | } | 67 | 300k | } | 68 | 69.5k | for (; x < w; x++) { | 69 | 43.5k | if (transform_type == 6) { | 70 | 43.5k | pixel_type Y = in0[x]; | 71 | 43.5k | pixel_type Co = in1[x]; | 72 | 43.5k | pixel_type Cg = in2[x]; | 73 | 43.5k | pixel_type tmp = PixelAdd(Y, -(Cg >> 1)); | 74 | 43.5k | pixel_type G = PixelAdd(Cg, tmp); | 75 | 43.5k | pixel_type B = PixelAdd(tmp, -(Co >> 1)); | 76 | 43.5k | pixel_type R = PixelAdd(B, Co); | 77 | 43.5k | out0[x] = R; | 78 | 43.5k | out1[x] = G; | 79 | 43.5k | out2[x] = B; | 80 | 43.5k | } else { | 81 | 0 | pixel_type First = in0[x]; | 82 | 0 | pixel_type Second = in1[x]; | 83 | 0 | pixel_type Third = in2[x]; | 84 | 0 | if (third) Third = PixelAdd(Third, First); | 85 | 0 | if (second == 1) { | 86 | 0 | Second = PixelAdd(Second, First); | 87 | 0 | } else if (second == 2) { | 88 | 0 | Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1)); | 89 | 0 | } | 90 | 0 | out0[x] = First; | 91 | 0 | out1[x] = Second; | 92 | 0 | out2[x] = Third; | 93 | 0 | } | 94 | 43.5k | } | 95 | 25.9k | } |
Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<0>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<1>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<2>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<3>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<4>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<5>(int const*, int const*, int const*, int*, int*, int*, unsigned long) Unexecuted instantiation: void jxl::N_SSE2::InvRCTRow<6>(int const*, int const*, int const*, int*, int*, int*, unsigned long) |
96 | | |
97 | 2.48k | Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { |
98 | 2.48k | JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2)); |
99 | 2.48k | size_t m = begin_c; |
100 | 2.48k | Channel& c0 = input.channel[m + 0]; |
101 | 2.48k | size_t w = c0.w; |
102 | 2.48k | size_t h = c0.h; |
103 | 2.48k | if (rct_type == 0) { // noop |
104 | 1.22k | return true; |
105 | 1.22k | } |
106 | | // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR |
107 | 1.25k | int permutation = rct_type / 7; |
108 | 1.25k | JXL_ENSURE(permutation < 6); |
109 | | // 0-5 values have the low bit corresponding to Third and the high bits |
110 | | // corresponding to Second. 6 corresponds to YCoCg. |
111 | | // |
112 | | // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird |
113 | | // |
114 | | // Third: 0=nop, 1=SubtractFirst |
115 | 1.25k | int custom = rct_type % 7; |
116 | | // Special case: permute-only. Swap channels around. |
117 | 1.25k | if (custom == 0) { |
118 | 0 | Channel ch0 = std::move(input.channel[m]); |
119 | 0 | Channel ch1 = std::move(input.channel[m + 1]); |
120 | 0 | Channel ch2 = std::move(input.channel[m + 2]); |
121 | 0 | input.channel[m + (permutation % 3)] = std::move(ch0); |
122 | 0 | input.channel[m + ((permutation + 1 + permutation / 3) % 3)] = |
123 | 0 | std::move(ch1); |
124 | 0 | input.channel[m + ((permutation + 2 - permutation / 3) % 3)] = |
125 | 0 | std::move(ch2); |
126 | 0 | return true; |
127 | 0 | } |
128 | 1.25k | constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = { |
129 | 1.25k | InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>, |
130 | 1.25k | InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>}; |
131 | 1.25k | const auto process_row = [&](const uint32_t task, |
132 | 53.4k | size_t /* thread */) -> Status { |
133 | 53.4k | const size_t y = task; |
134 | 53.4k | const pixel_type* in0 = input.channel[m].Row(y); |
135 | 53.4k | const pixel_type* in1 = input.channel[m + 1].Row(y); |
136 | 53.4k | const pixel_type* in2 = input.channel[m + 2].Row(y); |
137 | 53.4k | pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y); |
138 | 53.4k | pixel_type* out1 = |
139 | 53.4k | input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y); |
140 | 53.4k | pixel_type* out2 = |
141 | 53.4k | input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y); |
142 | 53.4k | inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w); |
143 | 53.4k | return true; |
144 | 53.4k | }; Unexecuted instantiation: rct.cc:jxl::N_SSE4::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const rct.cc:jxl::N_AVX2::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const Line | Count | Source | 132 | 53.4k | size_t /* thread */) -> Status { | 133 | 53.4k | const size_t y = task; | 134 | 53.4k | const pixel_type* in0 = input.channel[m].Row(y); | 135 | 53.4k | const pixel_type* in1 = input.channel[m + 1].Row(y); | 136 | 53.4k | const pixel_type* in2 = input.channel[m + 2].Row(y); | 137 | 53.4k | pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y); | 138 | 53.4k | pixel_type* out1 = | 139 | 53.4k | input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y); | 140 | 53.4k | pixel_type* out2 = | 141 | 53.4k | input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y); | 142 | 53.4k | inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w); | 143 | 53.4k | return true; | 144 | 53.4k | }; |
Unexecuted instantiation: rct.cc:jxl::N_SSE2::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*)::$_0::operator()(unsigned int, unsigned long) const |
145 | 1.25k | JXL_RETURN_IF_ERROR( |
146 | 1.25k | RunOnPool(pool, 0, h, ThreadPool::NoInit, process_row, "InvRCT")); |
147 | 1.25k | return true; |
148 | 1.25k | } Unexecuted instantiation: jxl::N_SSE4::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*) jxl::N_AVX2::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*) Line | Count | Source | 97 | 2.48k | Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { | 98 | 2.48k | JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2)); | 99 | 2.48k | size_t m = begin_c; | 100 | 2.48k | Channel& c0 = input.channel[m + 0]; | 101 | 2.48k | size_t w = c0.w; | 102 | 2.48k | size_t h = c0.h; | 103 | 2.48k | if (rct_type == 0) { // noop | 104 | 1.22k | return true; | 105 | 1.22k | } | 106 | | // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR | 107 | 1.25k | int permutation = rct_type / 7; | 108 | 1.25k | JXL_ENSURE(permutation < 6); | 109 | | // 0-5 values have the low bit corresponding to Third and the high bits | 110 | | // corresponding to Second. 6 corresponds to YCoCg. | 111 | | // | 112 | | // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird | 113 | | // | 114 | | // Third: 0=nop, 1=SubtractFirst | 115 | 1.25k | int custom = rct_type % 7; | 116 | | // Special case: permute-only. Swap channels around. | 117 | 1.25k | if (custom == 0) { | 118 | 0 | Channel ch0 = std::move(input.channel[m]); | 119 | 0 | Channel ch1 = std::move(input.channel[m + 1]); | 120 | 0 | Channel ch2 = std::move(input.channel[m + 2]); | 121 | 0 | input.channel[m + (permutation % 3)] = std::move(ch0); | 122 | 0 | input.channel[m + ((permutation + 1 + permutation / 3) % 3)] = | 123 | 0 | std::move(ch1); | 124 | 0 | input.channel[m + ((permutation + 2 - permutation / 3) % 3)] = | 125 | 0 | std::move(ch2); | 126 | 0 | return true; | 127 | 0 | } | 128 | 1.25k | constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = { | 129 | 1.25k | InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>, | 130 | 1.25k | InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>}; | 131 | 1.25k | const auto process_row = [&](const uint32_t task, | 132 | 1.25k | size_t /* thread */) -> Status { | 133 | 1.25k | const size_t y = task; | 134 | 1.25k | const pixel_type* in0 = input.channel[m].Row(y); | 135 | 1.25k | const pixel_type* in1 = input.channel[m + 1].Row(y); | 136 | 1.25k | const pixel_type* in2 = input.channel[m + 2].Row(y); | 137 | 1.25k | pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y); | 138 | 1.25k | pixel_type* out1 = | 139 | 1.25k | input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y); | 140 | 1.25k | pixel_type* out2 = | 141 | 1.25k | input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y); | 142 | 1.25k | inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w); | 143 | 1.25k | return true; | 144 | 1.25k | }; | 145 | 1.25k | JXL_RETURN_IF_ERROR( | 146 | 1.25k | RunOnPool(pool, 0, h, ThreadPool::NoInit, process_row, "InvRCT")); | 147 | 1.25k | return true; | 148 | 1.25k | } |
Unexecuted instantiation: jxl::N_SSE2::InvRCT(jxl::Image&, unsigned long, unsigned long, jxl::ThreadPool*) |
149 | | |
150 | | } // namespace HWY_NAMESPACE |
151 | | } // namespace jxl |
152 | | HWY_AFTER_NAMESPACE(); |
153 | | |
154 | | #if HWY_ONCE |
155 | | namespace jxl { |
156 | | |
157 | | HWY_EXPORT(InvRCT); |
158 | 2.48k | Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { |
159 | 2.48k | return HWY_DYNAMIC_DISPATCH(InvRCT)(input, begin_c, rct_type, pool); |
160 | 2.48k | } |
161 | | |
162 | | } // namespace jxl |
163 | | #endif |