/src/qtbase/src/gui/painting/qimagescale_sse4.cpp
Line | Count | Source |
1 | | // Copyright (C) 2016 The Qt Company Ltd. |
2 | | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | | |
4 | | #include "qimagescale_p.h" |
5 | | #include "qimage.h" |
6 | | #include <private/qdrawhelper_x86_p.h> |
7 | | #include <private/qsimd_p.h> |
8 | | |
9 | | #if defined(QT_COMPILER_SUPPORTS_SSE4_1) |
10 | | |
11 | | QT_BEGIN_NAMESPACE |
12 | | |
13 | | using namespace QImageScale; |
14 | | |
15 | | inline static __m128i Q_DECL_VECTORCALL |
16 | | qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy) |
17 | 0 | { |
18 | 0 | __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); |
19 | 0 | __m128i vx = _mm_mullo_epi32(vpix, vxyap); |
20 | 0 | int i; |
21 | 0 | for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) { |
22 | 0 | pix += step; |
23 | 0 | vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); |
24 | 0 | vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy)); |
25 | 0 | } |
26 | 0 | pix += step; |
27 | 0 | vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); |
28 | 0 | vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i))); |
29 | 0 | return vx; |
30 | 0 | } |
31 | | |
32 | | template<bool RGB> |
33 | | void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest, |
34 | | int dw, int dh, int dow, int sow) |
35 | 0 | { |
36 | 0 | const unsigned int **ypoints = isi->ypoints; |
37 | 0 | const int *xpoints = isi->xpoints; |
38 | 0 | const int *xapoints = isi->xapoints; |
39 | 0 | const int *yapoints = isi->yapoints; |
40 | |
|
41 | 0 | const __m128i v256 = _mm_set1_epi32(256); |
42 | | |
43 | | /* go through every scanline in the output buffer */ |
44 | 0 | auto scaleSection = [&] (int yStart, int yEnd) { |
45 | 0 | for (int y = yStart; y < yEnd; ++y) { |
46 | 0 | const int Cy = yapoints[y] >> 16; |
47 | 0 | const int yap = yapoints[y] & 0xffff; |
48 | 0 | const __m128i vCy = _mm_set1_epi32(Cy); |
49 | 0 | const __m128i vyap = _mm_set1_epi32(yap); |
50 | |
|
51 | 0 | unsigned int *dptr = dest + (y * dow); |
52 | 0 | for (int x = 0; x < dw; x++) { |
53 | 0 | const unsigned int *sptr = ypoints[y] + xpoints[x]; |
54 | 0 | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy); |
55 | |
|
56 | 0 | const int xap = xapoints[x]; |
57 | 0 | if (xap > 0) { |
58 | 0 | const __m128i vxap = _mm_set1_epi32(xap); |
59 | 0 | const __m128i vinvxap = _mm_sub_epi32(v256, vxap); |
60 | 0 | __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy); |
61 | |
|
62 | 0 | vx = _mm_mullo_epi32(vx, vinvxap); |
63 | 0 | vr = _mm_mullo_epi32(vr, vxap); |
64 | 0 | vx = _mm_add_epi32(vx, vr); |
65 | 0 | vx = _mm_srli_epi32(vx, 8); |
66 | 0 | } |
67 | 0 | vx = _mm_srli_epi32(vx, 14); |
68 | 0 | vx = _mm_packus_epi32(vx, vx); |
69 | 0 | vx = _mm_packus_epi16(vx, vx); |
70 | 0 | *dptr = _mm_cvtsi128_si32(vx); |
71 | 0 | if (RGB) |
72 | 0 | *dptr |= 0xff000000; |
73 | 0 | dptr++; |
74 | 0 | } |
75 | 0 | } |
76 | 0 | }; Unexecuted instantiation: qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constUnexecuted instantiation: qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const |
77 | 0 | multithread_pixels_function(isi, dh, scaleSection); |
78 | 0 | } Unexecuted instantiation: void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) Unexecuted instantiation: void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) |
79 | | |
80 | | template<bool RGB> |
81 | | void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest, |
82 | | int dw, int dh, int dow, int sow) |
83 | 0 | { |
84 | 0 | const unsigned int **ypoints = isi->ypoints; |
85 | 0 | int *xpoints = isi->xpoints; |
86 | 0 | int *xapoints = isi->xapoints; |
87 | 0 | int *yapoints = isi->yapoints; |
88 | |
|
89 | 0 | const __m128i v256 = _mm_set1_epi32(256); |
90 | | |
91 | | /* go through every scanline in the output buffer */ |
92 | 0 | auto scaleSection = [&] (int yStart, int yEnd) { |
93 | 0 | for (int y = yStart; y < yEnd; ++y) { |
94 | 0 | unsigned int *dptr = dest + (y * dow); |
95 | 0 | for (int x = 0; x < dw; x++) { |
96 | 0 | int Cx = xapoints[x] >> 16; |
97 | 0 | int xap = xapoints[x] & 0xffff; |
98 | 0 | const __m128i vCx = _mm_set1_epi32(Cx); |
99 | 0 | const __m128i vxap = _mm_set1_epi32(xap); |
100 | |
|
101 | 0 | const unsigned int *sptr = ypoints[y] + xpoints[x]; |
102 | 0 | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); |
103 | |
|
104 | 0 | int yap = yapoints[y]; |
105 | 0 | if (yap > 0) { |
106 | 0 | const __m128i vyap = _mm_set1_epi32(yap); |
107 | 0 | const __m128i vinvyap = _mm_sub_epi32(v256, vyap); |
108 | 0 | __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx); |
109 | |
|
110 | 0 | vx = _mm_mullo_epi32(vx, vinvyap); |
111 | 0 | vr = _mm_mullo_epi32(vr, vyap); |
112 | 0 | vx = _mm_add_epi32(vx, vr); |
113 | 0 | vx = _mm_srli_epi32(vx, 8); |
114 | 0 | } |
115 | 0 | vx = _mm_srli_epi32(vx, 14); |
116 | 0 | vx = _mm_packus_epi32(vx, vx); |
117 | 0 | vx = _mm_packus_epi16(vx, vx); |
118 | 0 | *dptr = _mm_cvtsi128_si32(vx); |
119 | 0 | if (RGB) |
120 | 0 | *dptr |= 0xff000000; |
121 | 0 | dptr++; |
122 | 0 | } |
123 | 0 | } |
124 | 0 | }; Unexecuted instantiation: qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constUnexecuted instantiation: qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const |
125 | 0 | multithread_pixels_function(isi, dh, scaleSection); |
126 | 0 | } Unexecuted instantiation: void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) Unexecuted instantiation: void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) |
127 | | |
128 | | template<bool RGB> |
129 | | void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest, |
130 | | int dw, int dh, int dow, int sow) |
131 | 0 | { |
132 | 0 | const unsigned int **ypoints = isi->ypoints; |
133 | 0 | int *xpoints = isi->xpoints; |
134 | 0 | int *xapoints = isi->xapoints; |
135 | 0 | int *yapoints = isi->yapoints; |
136 | |
|
137 | 0 | auto scaleSection = [&] (int yStart, int yEnd) { |
138 | 0 | for (int y = yStart; y < yEnd; ++y) { |
139 | 0 | int Cy = yapoints[y] >> 16; |
140 | 0 | int yap = yapoints[y] & 0xffff; |
141 | 0 | const __m128i vCy = _mm_set1_epi32(Cy); |
142 | 0 | const __m128i vyap = _mm_set1_epi32(yap); |
143 | |
|
144 | 0 | unsigned int *dptr = dest + (y * dow); |
145 | 0 | for (int x = 0; x < dw; x++) { |
146 | 0 | const int Cx = xapoints[x] >> 16; |
147 | 0 | const int xap = xapoints[x] & 0xffff; |
148 | 0 | const __m128i vCx = _mm_set1_epi32(Cx); |
149 | 0 | const __m128i vxap = _mm_set1_epi32(xap); |
150 | |
|
151 | 0 | const unsigned int *sptr = ypoints[y] + xpoints[x]; |
152 | 0 | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); |
153 | 0 | __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap); |
154 | |
|
155 | 0 | int j; |
156 | 0 | for (j = (1 << 14) - yap; j > Cy; j -= Cy) { |
157 | 0 | sptr += sow; |
158 | 0 | vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); |
159 | 0 | vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy)); |
160 | 0 | } |
161 | 0 | sptr += sow; |
162 | 0 | vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); |
163 | 0 | vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j))); |
164 | |
|
165 | 0 | vr = _mm_srli_epi32(vr, 24); |
166 | 0 | vr = _mm_packus_epi32(vr, _mm_setzero_si128()); |
167 | 0 | vr = _mm_packus_epi16(vr, _mm_setzero_si128()); |
168 | 0 | *dptr = _mm_cvtsi128_si32(vr); |
169 | 0 | if (RGB) |
170 | 0 | *dptr |= 0xff000000; |
171 | 0 | dptr++; |
172 | 0 | } |
173 | 0 | } |
174 | 0 | }; Unexecuted instantiation: qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constUnexecuted instantiation: qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const |
175 | 0 | multithread_pixels_function(isi, dh, scaleSection); |
176 | 0 | } Unexecuted instantiation: void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) Unexecuted instantiation: void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) |
177 | | |
178 | | template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest, |
179 | | int dw, int dh, int dow, int sow); |
180 | | |
181 | | template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest, |
182 | | int dw, int dh, int dow, int sow); |
183 | | |
184 | | template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest, |
185 | | int dw, int dh, int dow, int sow); |
186 | | |
187 | | template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest, |
188 | | int dw, int dh, int dow, int sow); |
189 | | |
190 | | template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest, |
191 | | int dw, int dh, int dow, int sow); |
192 | | |
193 | | template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest, |
194 | | int dw, int dh, int dow, int sow); |
195 | | |
196 | | QT_END_NAMESPACE |
197 | | |
198 | | #endif |