/src/vvdec/source/Lib/CommonLib/x86/PictureX86.h
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | /** \file InterPredX86.h |
44 | | \brief SIMD for InterPrediction |
45 | | */ |
46 | | |
47 | | //! \ingroup CommonLib |
48 | | //! \{ |
49 | | |
50 | | |
51 | | #include "CommonLib/CommonDef.h" |
52 | | #include "CommonDefX86.h" |
53 | | #include "CommonLib/Picture.h" |
54 | | |
55 | | #if ENABLE_SIMD_OPT_PICTURE |
56 | | #ifdef TARGET_SIMD_X86 |
57 | | |
58 | | namespace vvdec |
59 | | { |
60 | | |
61 | | template<X86_VEXT vext> |
62 | | void paddPicBorderLeftRightSIMD(Pel *pi, ptrdiff_t stride,int width,int xmargin,int height) |
63 | 0 | { |
64 | 0 | __m128i xleft; |
65 | 0 | __m128i xright; |
66 | |
|
67 | 0 | for (int i=1;i<height-1;i++) |
68 | 0 | { |
69 | 0 | xleft = _mm_set1_epi16( pi[0] ); |
70 | 0 | xright = _mm_set1_epi16( pi[width - 1] ); |
71 | |
|
72 | 0 | int temp=xmargin; |
73 | 0 | int x=0; |
74 | 0 | while ((temp >> 3) > 0) |
75 | 0 | { |
76 | 0 | _mm_storeu_si128((__m128i*)&pi[-xmargin + x], xleft); |
77 | 0 | _mm_storeu_si128((__m128i*)&pi[width + x], xright); |
78 | 0 | x+=8; |
79 | 0 | temp-=8; |
80 | 0 | } |
81 | 0 | while ((temp >> 2) > 0) |
82 | 0 | { |
83 | 0 | _mm_storeu_si64((__m128i*)&pi[-xmargin + x], xleft); |
84 | 0 | _mm_storeu_si64((__m128i*)&pi[width + x], xright); |
85 | 0 | x+=4; |
86 | 0 | temp-=4; |
87 | 0 | } |
88 | 0 | while ((temp >> 1) > 0) |
89 | 0 | { |
90 | 0 | _mm_storeu_si32(( __m128i * )&pi[-xmargin + x], xleft); |
91 | 0 | _mm_storeu_si32(( __m128i * )&pi[width + x], xright); |
92 | 0 | x+=2; |
93 | 0 | temp-=2; |
94 | 0 | } |
95 | 0 | pi += stride; |
96 | 0 | } |
97 | 0 | } Unexecuted instantiation: void vvdec::paddPicBorderLeftRightSIMD<(vvdec::x86_simd::X86_VEXT)1>(short*, long, int, int, int) Unexecuted instantiation: void vvdec::paddPicBorderLeftRightSIMD<(vvdec::x86_simd::X86_VEXT)4>(short*, long, int, int, int) |
98 | | |
99 | | template<X86_VEXT vext> |
100 | | void paddPicBorderBotSIMD( Pel *pi, ptrdiff_t stride, int width, int xmargin, int ymargin ) |
101 | 0 | { |
102 | 0 | paddPicBorderLeftRightSIMD<vext>( pi, stride, width, xmargin, 3 ); |
103 | |
|
104 | 0 | pi -= xmargin; |
105 | |
|
106 | 0 | __m128i x8; |
107 | | #ifdef USE_AVX2 |
108 | | __m256i v16; |
109 | | #endif |
110 | 0 | int j, temp; |
111 | 0 | for( int i = 1; i <= ymargin; i++ ) |
112 | 0 | { |
113 | 0 | j = 0; |
114 | 0 | temp = width + ( xmargin << 1 ); |
115 | | #ifdef USE_AVX2 |
116 | 0 | while( ( temp >> 4 ) > 0 ) |
117 | 0 | { |
118 | 0 | v16 = _mm256_loadu_si256( ( __m256i* )( pi + j ) ); |
119 | 0 | _mm256_storeu_si256( ( __m256i* )( pi + j + i * stride ), v16 ); |
120 | 0 | j = j + 16; |
121 | 0 | temp = temp - 16; |
122 | 0 | } |
123 | | #endif |
124 | 0 | while( ( temp >> 3 ) > 0 ) |
125 | 0 | { |
126 | 0 | x8 = _mm_loadu_si128( ( __m128i* )( pi + j ) ); |
127 | 0 | _mm_storeu_si128( ( __m128i* )( pi + j + i * stride ), x8 ); |
128 | 0 | j = j + 8; |
129 | 0 | temp = temp - 8; |
130 | 0 | } |
131 | 0 | while( ( temp >> 2 ) > 0 ) |
132 | 0 | { |
133 | 0 | x8 = _mm_loadu_si64( ( __m128i * )( pi + j ) ); |
134 | 0 | _mm_storeu_si64( ( __m128i* )( pi + j + i * stride ), x8 ); |
135 | 0 | j = j + 4; |
136 | 0 | temp = temp - 4; |
137 | 0 | } |
138 | 0 | while( ( temp >> 1 ) > 0 ) |
139 | 0 | { |
140 | 0 | x8 = _mm_loadu_si32( ( __m128i * )( pi + j ) ); |
141 | 0 | _mm_storeu_si32( ( __m128i * )( pi + j + i * stride ), x8 ); |
142 | 0 | j += 2; |
143 | 0 | temp -= 2; |
144 | 0 | } |
145 | 0 | } |
146 | | #if USE_AVX2 |
147 | | |
148 | | _mm256_zeroupper(); |
149 | | #endif |
150 | 0 | } Unexecuted instantiation: void vvdec::paddPicBorderBotSIMD<(vvdec::x86_simd::X86_VEXT)1>(short*, long, int, int, int) Unexecuted instantiation: void vvdec::paddPicBorderBotSIMD<(vvdec::x86_simd::X86_VEXT)4>(short*, long, int, int, int) |
151 | | |
152 | | template<X86_VEXT vext> |
153 | | void paddPicBorderTopSIMD( Pel *pi, ptrdiff_t stride, int width, int xmargin, int ymargin ) |
154 | 0 | { |
155 | 0 | paddPicBorderLeftRightSIMD<vext>( pi, stride, width, xmargin, 3 ); |
156 | |
|
157 | 0 | pi -= xmargin; |
158 | |
|
159 | 0 | __m128i x8; |
160 | | #ifdef USE_AVX2 |
161 | | __m256i v16; |
162 | | #endif |
163 | 0 | int j, temp; |
164 | 0 | for( int i = 1; i <= ymargin; i++ ) |
165 | 0 | { |
166 | 0 | j = 0; |
167 | 0 | temp = width + ( xmargin << 1 ); |
168 | | #ifdef USE_AVX2 |
169 | 0 | while( ( temp >> 4 ) > 0 ) |
170 | 0 | { |
171 | 0 | v16 = _mm256_loadu_si256( ( __m256i* )( pi + j ) ); |
172 | 0 | _mm256_storeu_si256( ( __m256i* )( pi + j - i * stride ), v16 ); |
173 | 0 | j = j + 16; |
174 | 0 | temp = temp - 16; |
175 | 0 | } |
176 | | #endif |
177 | 0 | while( ( temp >> 3 ) > 0 ) |
178 | 0 | { |
179 | 0 | x8 = _mm_loadu_si128( ( __m128i* )( pi + j ) ); |
180 | 0 | _mm_storeu_si128( ( __m128i* )( pi + j - i * stride ), x8 ); |
181 | 0 | j = j + 8; |
182 | 0 | temp = temp - 8; |
183 | 0 | } |
184 | 0 | while( ( temp >> 2 ) > 0 ) |
185 | 0 | { |
186 | 0 | x8 = _mm_loadu_si64( ( __m128i * )( pi + j ) ); |
187 | 0 | _mm_storeu_si64( ( __m128i* )( pi + j - i * stride ), x8 ); |
188 | 0 | j = j + 4; |
189 | 0 | temp = temp - 4; |
190 | 0 | } |
191 | 0 | while( ( temp >> 1 ) > 0 ) |
192 | 0 | { |
193 | 0 | x8 = _mm_loadu_si32( ( __m128i * )( pi + j ) ); |
194 | 0 | _mm_storeu_si32( ( __m128i * )( pi + j - i * stride ), x8 ); |
195 | 0 | j += 2; |
196 | 0 | temp -= 2; |
197 | 0 | } |
198 | 0 | } |
199 | | #if USE_AVX2 |
200 | | |
201 | | _mm256_zeroupper(); |
202 | | #endif |
203 | 0 | } Unexecuted instantiation: void vvdec::paddPicBorderTopSIMD<(vvdec::x86_simd::X86_VEXT)1>(short*, long, int, int, int) Unexecuted instantiation: void vvdec::paddPicBorderTopSIMD<(vvdec::x86_simd::X86_VEXT)4>(short*, long, int, int, int) |
204 | | |
205 | | template<X86_VEXT vext> |
206 | | void Picture::_initPictureX86() |
207 | 0 | { |
208 | 0 | paddPicBorderBot = paddPicBorderBotSIMD<vext>; |
209 | 0 | paddPicBorderTop = paddPicBorderTopSIMD<vext>; |
210 | 0 | paddPicBorderLeftRight = paddPicBorderLeftRightSIMD<vext>; |
211 | 0 | } Unexecuted instantiation: void vvdec::Picture::_initPictureX86<(vvdec::x86_simd::X86_VEXT)1>() Unexecuted instantiation: void vvdec::Picture::_initPictureX86<(vvdec::x86_simd::X86_VEXT)4>() |
212 | | template void Picture::_initPictureX86<SIMDX86>(); |
213 | | |
214 | | } |
215 | | |
216 | | #endif // TARGET_SIMD_X86 |
217 | | #endif |
218 | | //! \} |