/src/libjxl/lib/jxl/convolve-inl.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include <cstddef> |
7 | | #include <cstdint> |
8 | | |
9 | | #include "lib/jxl/base/compiler_specific.h" |
10 | | |
11 | | #if defined(LIB_JXL_CONVOLVE_INL_H_) == defined(HWY_TARGET_TOGGLE) |
12 | | #ifdef LIB_JXL_CONVOLVE_INL_H_ |
13 | | #undef LIB_JXL_CONVOLVE_INL_H_ |
14 | | #else |
15 | | #define LIB_JXL_CONVOLVE_INL_H_ |
16 | | #endif |
17 | | |
18 | | #include <hwy/highway.h> |
19 | | |
20 | | #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) |
21 | | #include <xmmintrin.h> |
22 | | #endif |
23 | | |
24 | | #include "lib/jxl/base/data_parallel.h" |
25 | | #include "lib/jxl/base/rect.h" |
26 | | #include "lib/jxl/base/status.h" |
27 | | #include "lib/jxl/image.h" |
28 | | #include "lib/jxl/image_ops.h" |
29 | | |
30 | | HWY_BEFORE_NAMESPACE(); |
31 | | namespace jxl { |
32 | | namespace HWY_NAMESPACE { |
33 | | namespace { |
34 | | |
35 | | // These templates are not found via ADL. |
36 | | using hwy::HWY_NAMESPACE::TableLookupLanes; |
37 | | using hwy::HWY_NAMESPACE::Vec; |
38 | | |
39 | | // Synthesizes left/right neighbors from a vector of center pixels. |
40 | | class Neighbors { |
41 | | public: |
42 | | using D = HWY_CAPPED(float, 16); |
43 | | using V = Vec<D>; |
44 | | |
45 | | // Returns l[i] == c[Mirror(i - 1)]. |
46 | 0 | HWY_INLINE HWY_MAYBE_UNUSED static V FirstL1(const V c) { |
47 | | #if HWY_CAP_GE256 |
48 | | const D d; |
49 | 0 | HWY_ALIGN constexpr int32_t lanes[16] = {0, 0, 1, 2, 3, 4, 5, 6, |
50 | | 7, 8, 9, 10, 11, 12, 13, 14}; |
51 | | const auto indices = SetTableIndices(d, lanes); |
52 | | // c = PONM'LKJI |
53 | | return TableLookupLanes(c, indices); // ONML'KJII |
54 | | #elif HWY_TARGET == HWY_SCALAR |
55 | | return c; // Same (the first mirrored value is the last valid one) |
56 | | #else // 128 bit |
57 | | // c = LKJI |
58 | | #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) |
59 | | return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(2, 1, 0, 0))}; // KJII |
60 | | #else |
61 | | const D d; |
62 | | // TODO(deymo): Figure out if this can be optimized using a single vsri |
63 | | // instruction to convert LKJI to KJII. |
64 | | HWY_ALIGN constexpr int lanes[4] = {0, 0, 1, 2}; // KJII |
65 | | const auto indices = SetTableIndices(d, lanes); |
66 | | return TableLookupLanes(c, indices); |
67 | | #endif |
68 | | #endif |
69 | 0 | } Unexecuted instantiation: enc_convolve_separable5.cc:jxl::N_SSE4::(anonymous namespace)::Neighbors::FirstL1(hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: enc_convolve_separable5.cc:jxl::N_AVX2::(anonymous namespace)::Neighbors::FirstL1(hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: enc_convolve_separable5.cc:jxl::N_SSE2::(anonymous namespace)::Neighbors::FirstL1(hwy::N_SSE2::Vec128<float, 4ul>) |
70 | | |
71 | | // Returns l[i] == c[Mirror(i - 2)]. |
72 | 0 | HWY_INLINE HWY_MAYBE_UNUSED static V FirstL2(const V c) { |
73 | | #if HWY_CAP_GE256 |
74 | | const D d; |
75 | 0 | HWY_ALIGN constexpr int32_t lanes[16] = {1, 0, 0, 1, 2, 3, 4, 5, |
76 | | 6, 7, 8, 9, 10, 11, 12, 13}; |
77 | | const auto indices = SetTableIndices(d, lanes); |
78 | | // c = PONM'LKJI |
79 | | return TableLookupLanes(c, indices); // NMLK'JIIJ |
80 | | #elif HWY_TARGET == HWY_SCALAR |
81 | | const D d; |
82 | | JXL_DEBUG_ABORT("Unsupported"); |
83 | | return Zero(d); |
84 | | #else // 128 bit |
85 | | // c = LKJI |
86 | | #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) |
87 | | return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(1, 0, 0, 1))}; // JIIJ |
88 | | #else |
89 | | const D d; |
90 | | HWY_ALIGN constexpr int lanes[4] = {1, 0, 0, 1}; // JIIJ |
91 | | const auto indices = SetTableIndices(d, lanes); |
92 | | return TableLookupLanes(c, indices); |
93 | | #endif |
94 | | #endif |
95 | 0 | } Unexecuted instantiation: enc_convolve_separable5.cc:jxl::N_SSE4::(anonymous namespace)::Neighbors::FirstL2(hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: enc_convolve_separable5.cc:jxl::N_AVX2::(anonymous namespace)::Neighbors::FirstL2(hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: enc_convolve_separable5.cc:jxl::N_SSE2::(anonymous namespace)::Neighbors::FirstL2(hwy::N_SSE2::Vec128<float, 4ul>) |
96 | | }; |
97 | | |
98 | | } // namespace |
99 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
100 | | } // namespace HWY_NAMESPACE |
101 | | } // namespace jxl |
102 | | HWY_AFTER_NAMESPACE(); |
103 | | |
104 | | #endif // LIB_JXL_CONVOLVE_INL_H_ |