/src/libjxl/lib/jxl/inverse_mtf-inl.h
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | // SIMDified inverse-move-to-front transform. |
7 | | |
8 | | #include <cstdint> |
9 | | |
10 | | #include "lib/jxl/base/sanitizer_definitions.h" |
11 | | |
12 | | #if defined(LIB_JXL_INVERSE_MTF_INL_H_) == defined(HWY_TARGET_TOGGLE) |
13 | | #ifdef LIB_JXL_INVERSE_MTF_INL_H_ |
14 | | #undef LIB_JXL_INVERSE_MTF_INL_H_ |
15 | | #else |
16 | | #define LIB_JXL_INVERSE_MTF_INL_H_ |
17 | | #endif |
18 | | |
19 | | #include <hwy/highway.h> |
20 | | |
21 | | HWY_BEFORE_NAMESPACE(); |
22 | | namespace jxl { |
23 | | namespace HWY_NAMESPACE { |
24 | | |
25 | | // These templates are not found via ADL. |
26 | | using hwy::HWY_NAMESPACE::FirstN; |
27 | | using hwy::HWY_NAMESPACE::IfThenElse; |
28 | | using hwy::HWY_NAMESPACE::Load; |
29 | | using hwy::HWY_NAMESPACE::LoadU; |
30 | | using hwy::HWY_NAMESPACE::StoreU; |
31 | | |
32 | 123k | inline void MoveToFront(uint8_t* v, uint8_t index) { |
33 | 123k | uint8_t value = v[index]; |
34 | 123k | uint8_t i = index; |
35 | 123k | if (i < 4) { |
36 | 265k | for (; i; --i) v[i] = v[i - 1]; |
37 | 94.6k | } else { |
38 | 29.1k | const HWY_CAPPED(uint8_t, 64) d; |
39 | 29.1k | int tail = i & (Lanes(d) - 1); |
40 | 29.1k | if (tail) { |
41 | 27.3k | i -= tail; |
42 | 27.3k | const auto vec = Load(d, v + i); |
43 | 27.3k | const auto prev = LoadU(d, v + i + 1); |
44 | 27.3k | StoreU(IfThenElse(FirstN(d, tail), vec, prev), d, v + i + 1); |
45 | 27.3k | } |
46 | 212k | while (i) { |
47 | 183k | i -= Lanes(d); |
48 | 183k | const auto vec = Load(d, v + i); |
49 | 183k | StoreU(vec, d, v + i + 1); |
50 | 183k | } |
51 | 29.1k | } |
52 | 123k | v[0] = value; |
53 | 123k | } |
54 | | |
55 | 3.33k | inline void InverseMoveToFrontTransform(uint8_t* v, int v_len) { |
56 | 3.33k | HWY_ALIGN uint8_t mtf[256 + 64]; |
57 | 3.33k | int i; |
58 | 857k | for (i = 0; i < 256; ++i) { |
59 | 854k | mtf[i] = static_cast<uint8_t>(i); |
60 | 854k | } |
61 | | #if JXL_MEMORY_SANITIZER |
62 | | const HWY_CAPPED(uint8_t, 64) d; |
63 | | for (size_t j = 0; j < Lanes(d); ++j) { |
64 | | mtf[256 + j] = 0; |
65 | | } |
66 | | #endif // JXL_MEMORY_SANITIZER |
67 | 354k | for (i = 0; i < v_len; ++i) { |
68 | 351k | uint8_t index = v[i]; |
69 | 351k | v[i] = mtf[index]; |
70 | 351k | if (index) MoveToFront(mtf, index); |
71 | 351k | } |
72 | 3.33k | } |
73 | | |
74 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
75 | | } // namespace HWY_NAMESPACE |
76 | | } // namespace jxl |
77 | | HWY_AFTER_NAMESPACE(); |
78 | | |
79 | | #endif // LIB_JXL_INVERSE_MTF_INL_H_ |
80 | | |
81 | | #if HWY_ONCE |
82 | | #ifndef INVERSE_MTF_ONCE |
83 | | #define INVERSE_MTF_ONCE |
84 | | |
85 | | namespace jxl { |
86 | 3.33k | inline void InverseMoveToFrontTransform(uint8_t* v, int v_len) { |
87 | 3.33k | HWY_STATIC_DISPATCH(InverseMoveToFrontTransform)(v, v_len); |
88 | 3.33k | } |
89 | | } // namespace jxl |
90 | | |
91 | | #endif // INVERSE_MTF_ONCE |
92 | | #endif // HWY_ONCE |