/src/Simd/src/Simd/SimdSse41ResizerNearest.cpp
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2026 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdMemory.h" |
25 | | #include "Simd/SimdStore.h" |
26 | | #include "Simd/SimdResizer.h" |
27 | | #include "Simd/SimdResizerCommon.h" |
28 | | #include "Simd/SimdCopy.h" |
29 | | #include "Simd/SimdParallel.hpp" |
30 | | |
31 | | namespace Simd |
32 | | { |
33 | | #ifdef SIMD_SSE41_ENABLE |
34 | | namespace Sse41 |
35 | | { |
36 | | ResizerNearest::ResizerNearest(const ResParam& param) |
37 | 0 | : Base::ResizerNearest(param) |
38 | 0 | , _blocks(0) |
39 | 0 | { |
40 | 0 | } |
41 | | |
42 | | size_t ResizerNearest::BlockCountMax(size_t align) |
43 | 0 | { |
44 | 0 | return (size_t)::ceil(float(Simd::Max(_param.srcW, _param.dstW) * _param.PixelSize()) / (align - _param.PixelSize())); |
45 | 0 | } |
46 | | |
47 | | void ResizerNearest::EstimateParams() |
48 | 0 | { |
49 | 0 | if (_blocks) |
50 | 0 | return; |
51 | 0 | Base::ResizerNearest::EstimateParams(); |
52 | 0 | const size_t pixelSize = _param.PixelSize(); |
53 | 0 | if (pixelSize *_param.dstW < A || pixelSize * _param.srcW < A) |
54 | 0 | return; |
55 | 0 | if (pixelSize < 4 && _param.srcW < 4 * _param.dstW) |
56 | 0 | _blocks = BlockCountMax(A); |
57 | 0 | float scale = (float)_param.srcW / _param.dstW; |
58 | 0 | if (_blocks) |
59 | 0 | { |
60 | 0 | _tails = 0; |
61 | 0 | _ix16x1.Resize(_blocks); |
62 | 0 | _tail16x1.Resize((size_t)::ceil(A * scale / pixelSize)); |
63 | 0 | size_t dstRowSize = _param.dstW * pixelSize; |
64 | 0 | int block = 0; |
65 | 0 | _ix16x1[0].src = 0; |
66 | 0 | _ix16x1[0].dst = 0; |
67 | 0 | for (int dstIndex = 0; dstIndex < (int)_param.dstW; ++dstIndex) |
68 | 0 | { |
69 | 0 | int srcIndex = _ix[dstIndex] / (int)pixelSize; |
70 | 0 | int dst = dstIndex * (int)pixelSize - _ix16x1[block].dst; |
71 | 0 | int src = srcIndex * (int)pixelSize - _ix16x1[block].src; |
72 | 0 | if (src >= int(A - pixelSize) || dst >= int(A - pixelSize)) |
73 | 0 | { |
74 | 0 | block++; |
75 | 0 | _ix16x1[block].src = srcIndex * (int)pixelSize; |
76 | 0 | _ix16x1[block].dst = dstIndex * (int)pixelSize; |
77 | 0 | if (_ix16x1[block].dst > int(dstRowSize - A)) |
78 | 0 | { |
79 | 0 | _tail16x1[_tails] = LeftNotZero8i(dstRowSize - _ix16x1[block].dst); |
80 | 0 | _tails++; |
81 | 0 | } |
82 | 0 | dst = 0; |
83 | 0 | src = srcIndex * (int)pixelSize - _ix16x1[block].src; |
84 | 0 | } |
85 | 0 | for(size_t i = 0; i < pixelSize; ++i) |
86 | 0 | _ix16x1[block].shuffle[dst + i] = uint8_t(src + i); |
87 | 0 | } |
88 | 0 | _blocks = block + 1; |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | | void ResizerNearest::Shuffle16x1(const uint8_t* src, size_t srcStride, size_t dyBeg, size_t dyEnd, uint8_t* dst, size_t dstStride) |
93 | 0 | { |
94 | 0 | size_t body = _blocks - _tails; |
95 | 0 | for (size_t dy = dyBeg; dy < dyEnd; dy++) |
96 | 0 | { |
97 | 0 | const uint8_t* srcRow = src + _iy[dy] * srcStride; |
98 | 0 | size_t i = 0, t = 0; |
99 | 0 | for (; i < body; ++i) |
100 | 0 | { |
101 | 0 | const IndexShuffle16x1& index = _ix16x1[i]; |
102 | 0 | __m128i _src = _mm_loadu_si128((__m128i*)(srcRow + index.src)); |
103 | 0 | __m128i _shuffle = _mm_loadu_si128((__m128i*) & index.shuffle); |
104 | 0 | _mm_storeu_si128((__m128i*)(dst + index.dst), _mm_shuffle_epi8(_src, _shuffle)); |
105 | 0 | } |
106 | 0 | for (; i < _blocks; ++i, t++) |
107 | 0 | { |
108 | 0 | const IndexShuffle16x1& index = _ix16x1[i]; |
109 | 0 | __m128i _src = _mm_loadu_si128((__m128i*)(srcRow + index.src)); |
110 | 0 | __m128i _shuffle = _mm_loadu_si128((__m128i*) & index.shuffle); |
111 | 0 | StoreMasked<false>((__m128i*)(dst + index.dst), _mm_shuffle_epi8(_src, _shuffle), _tail16x1[t]); |
112 | 0 | } |
113 | 0 | dst += dstStride; |
114 | 0 | } |
115 | 0 | } |
116 | | |
117 | | SIMD_INLINE void CopyPixel12(const uint8_t* src, uint8_t* dst) |
118 | 0 | { |
119 | 0 | __m128i val = _mm_loadu_si128((__m128i*)src); |
120 | 0 | _mm_storeu_si128((__m128i*)dst, val); |
121 | 0 | } |
122 | | |
123 | | void ResizerNearest::Resize12(const uint8_t* src, size_t srcStride, size_t dyBeg, size_t dyEnd, uint8_t* dst, size_t dstStride) |
124 | 0 | { |
125 | 0 | size_t body = _param.dstW - 1; |
126 | 0 | for (size_t dy = dyBeg; dy < dyEnd; dy++) |
127 | 0 | { |
128 | 0 | const uint8_t* srcRow = src + _iy[dy] * srcStride; |
129 | 0 | size_t dx = 0, offset = 0; |
130 | 0 | for (; dx < body; dx++, offset += 12) |
131 | 0 | CopyPixel12(srcRow + _ix[dx], dst + offset); |
132 | 0 | Base::CopyPixel<12>(srcRow + _ix[dx], dst + offset); |
133 | 0 | dst += dstStride; |
134 | 0 | } |
135 | 0 | } |
136 | | |
137 | | void ResizerNearest::Run(const uint8_t* src, size_t srcStride, uint8_t* dst, size_t dstStride) |
138 | 0 | { |
139 | 0 | EstimateParams(); |
140 | 0 | if (_blocks) |
141 | 0 | { |
142 | 0 | Simd::Parallel(0, _param.dstH, [&](size_t thread, size_t dstBeg, size_t dstEnd) |
143 | 0 | { |
144 | 0 | this->Shuffle16x1(src, srcStride, dstBeg, dstEnd, dst + dstBeg * dstStride, dstStride); |
145 | 0 | }, _threads, 1); |
146 | 0 | } |
147 | 0 | else if (_pixelSize == 12) |
148 | 0 | { |
149 | 0 | Simd::Parallel(0, _param.dstH, [&](size_t thread, size_t dstBeg, size_t dstEnd) |
150 | 0 | { |
151 | 0 | this->Resize12(src, srcStride, dstBeg, dstEnd, dst + dstBeg * dstStride, dstStride); |
152 | 0 | }, _threads, 1); |
153 | 0 | } |
154 | 0 | else |
155 | 0 | Base::ResizerNearest::Run(src, srcStride, dst, dstStride); |
156 | 0 | } |
157 | | } |
158 | | #endif |
159 | | } |
160 | | |