/src/qtbase/src/gui/painting/qimagescale_sse4.cpp
Line | Count | Source |
1 | | // Copyright (C) 2016 The Qt Company Ltd. |
2 | | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | | |
4 | | #include "qimagescale_p.h" |
5 | | #include "qimage.h" |
6 | | #include <private/qdrawhelper_x86_p.h> |
7 | | #include <private/qsimd_p.h> |
8 | | |
9 | | #if QT_CONFIG(qtgui_threadpool) |
10 | | #include <private/qlatch_p.h> |
11 | | #include <qthreadpool.h> |
12 | | #include <private/qguiapplication_p.h> |
13 | | #include <private/qthreadpool_p.h> |
14 | | #endif |
15 | | |
16 | | #if defined(QT_COMPILER_SUPPORTS_SSE4_1) |
17 | | |
18 | | QT_BEGIN_NAMESPACE |
19 | | |
20 | | using namespace QImageScale; |
21 | | |
22 | | template<typename T> |
23 | | static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection) |
24 | 1.40k | { |
25 | 1.40k | #if QT_CONFIG(qtgui_threadpool) |
26 | 1.40k | int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16); |
27 | 1.40k | segments = std::min(segments, dh); |
28 | 1.40k | QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool(); |
29 | 1.40k | if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) { |
30 | 504 | QLatch latch(segments); |
31 | 504 | int y = 0; |
32 | 10.8k | for (int i = 0; i < segments; ++i) { |
33 | 10.3k | int yn = (dh - y) / (segments - i); |
34 | 10.3k | threadPool->start([&, y, yn]() { |
35 | 10.3k | scaleSection(y, y + yn); |
36 | 10.3k | latch.countDown(); |
37 | 10.3k | }); Unexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() constUnexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() constUnexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() constUnexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() constUnexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() constqimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() constLine | Count | Source | 34 | 10.3k | threadPool->start([&, y, yn]() { | 35 | 10.3k | scaleSection(y, y + yn); | 36 | 10.3k | latch.countDown(); | 37 | 10.3k | }); |
|
38 | 10.3k | y += yn; |
39 | 10.3k | } |
40 | 504 | latch.wait(); |
41 | 504 | return; |
42 | 504 | } |
43 | 905 | #endif |
44 | 905 | scaleSection(0, dh); |
45 | 905 | } Unexecuted instantiation: qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)Line | Count | Source | 24 | 3 | { | 25 | 3 | #if QT_CONFIG(qtgui_threadpool) | 26 | 3 | int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16); | 27 | 3 | segments = std::min(segments, dh); | 28 | 3 | QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool(); | 29 | 3 | if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) { | 30 | 0 | QLatch latch(segments); | 31 | 0 | int y = 0; | 32 | 0 | for (int i = 0; i < segments; ++i) { | 33 | 0 | int yn = (dh - y) / (segments - i); | 34 | 0 | threadPool->start([&, y, yn]() { | 35 | 0 | scaleSection(y, y + yn); | 36 | 0 | latch.countDown(); | 37 | 0 | }); | 38 | 0 | y += yn; | 39 | 0 | } | 40 | 0 | latch.wait(); | 41 | 0 | return; | 42 | 0 | } | 43 | 3 | #endif | 44 | 3 | scaleSection(0, dh); | 45 | 3 | } |
Unexecuted instantiation: qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)Line | Count | Source | 24 | 6 | { | 25 | 6 | #if QT_CONFIG(qtgui_threadpool) | 26 | 6 | int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16); | 27 | 6 | segments = std::min(segments, dh); | 28 | 6 | QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool(); | 29 | 6 | if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) { | 30 | 0 | QLatch latch(segments); | 31 | 0 | int y = 0; | 32 | 0 | for (int i = 0; i < segments; ++i) { | 33 | 0 | int yn = (dh - y) / (segments - i); | 34 | 0 | threadPool->start([&, y, yn]() { | 35 | 0 | scaleSection(y, y + yn); | 36 | 0 | latch.countDown(); | 37 | 0 | }); | 38 | 0 | y += yn; | 39 | 0 | } | 40 | 0 | latch.wait(); | 41 | 0 | return; | 42 | 0 | } | 43 | 6 | #endif | 44 | 6 | scaleSection(0, dh); | 45 | 6 | } |
Unexecuted instantiation: qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)Line | Count | Source | 24 | 1.40k | { | 25 | 1.40k | #if QT_CONFIG(qtgui_threadpool) | 26 | 1.40k | int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16); | 27 | 1.40k | segments = std::min(segments, dh); | 28 | 1.40k | QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool(); | 29 | 1.40k | if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) { | 30 | 504 | QLatch latch(segments); | 31 | 504 | int y = 0; | 32 | 10.8k | for (int i = 0; i < segments; ++i) { | 33 | 10.3k | int yn = (dh - y) / (segments - i); | 34 | 10.3k | threadPool->start([&, y, yn]() { | 35 | 10.3k | scaleSection(y, y + yn); | 36 | 10.3k | latch.countDown(); | 37 | 10.3k | }); | 38 | 10.3k | y += yn; | 39 | 10.3k | } | 40 | 504 | latch.wait(); | 41 | 504 | return; | 42 | 504 | } | 43 | 896 | #endif | 44 | 896 | scaleSection(0, dh); | 45 | 896 | } |
|
46 | | |
47 | | inline static __m128i Q_DECL_VECTORCALL |
48 | | qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy) |
49 | 18.6M | { |
50 | 18.6M | __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); |
51 | 18.6M | __m128i vx = _mm_mullo_epi32(vpix, vxyap); |
52 | 18.6M | int i; |
53 | 228M | for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) { |
54 | 210M | pix += step; |
55 | 210M | vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); |
56 | 210M | vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy)); |
57 | 210M | } |
58 | 18.6M | pix += step; |
59 | 18.6M | vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); |
60 | 18.6M | vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i))); |
61 | 18.6M | return vx; |
62 | 18.6M | } |
63 | | |
64 | | template<bool RGB> |
65 | | void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest, |
66 | | int dw, int dh, int dow, int sow) |
67 | 3 | { |
68 | 3 | const unsigned int **ypoints = isi->ypoints; |
69 | 3 | const int *xpoints = isi->xpoints; |
70 | 3 | const int *xapoints = isi->xapoints; |
71 | 3 | const int *yapoints = isi->yapoints; |
72 | | |
73 | 3 | const __m128i v256 = _mm_set1_epi32(256); |
74 | | |
75 | | /* go through every scanline in the output buffer */ |
76 | 3 | auto scaleSection = [&] (int yStart, int yEnd) { |
77 | 258 | for (int y = yStart; y < yEnd; ++y) { |
78 | 255 | const int Cy = yapoints[y] >> 16; |
79 | 255 | const int yap = yapoints[y] & 0xffff; |
80 | 255 | const __m128i vCy = _mm_set1_epi32(Cy); |
81 | 255 | const __m128i vyap = _mm_set1_epi32(yap); |
82 | | |
83 | 255 | unsigned int *dptr = dest + (y * dow); |
84 | 32.8k | for (int x = 0; x < dw; x++) { |
85 | 32.6k | const unsigned int *sptr = ypoints[y] + xpoints[x]; |
86 | 32.6k | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy); |
87 | | |
88 | 32.6k | const int xap = xapoints[x]; |
89 | 32.6k | if (xap > 0) { |
90 | 0 | const __m128i vxap = _mm_set1_epi32(xap); |
91 | 0 | const __m128i vinvxap = _mm_sub_epi32(v256, vxap); |
92 | 0 | __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy); |
93 | |
|
94 | 0 | vx = _mm_mullo_epi32(vx, vinvxap); |
95 | 0 | vr = _mm_mullo_epi32(vr, vxap); |
96 | 0 | vx = _mm_add_epi32(vx, vr); |
97 | 0 | vx = _mm_srli_epi32(vx, 8); |
98 | 0 | } |
99 | 32.6k | vx = _mm_srli_epi32(vx, 14); |
100 | 32.6k | vx = _mm_packus_epi32(vx, vx); |
101 | 32.6k | vx = _mm_packus_epi16(vx, vx); |
102 | 32.6k | *dptr = _mm_cvtsi128_si32(vx); |
103 | 32.6k | if (RGB) |
104 | 32.6k | *dptr |= 0xff000000; |
105 | 32.6k | dptr++; |
106 | 32.6k | } |
107 | 255 | } |
108 | 3 | }; Unexecuted instantiation: qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constqt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constLine | Count | Source | 76 | 3 | auto scaleSection = [&] (int yStart, int yEnd) { | 77 | 258 | for (int y = yStart; y < yEnd; ++y) { | 78 | 255 | const int Cy = yapoints[y] >> 16; | 79 | 255 | const int yap = yapoints[y] & 0xffff; | 80 | 255 | const __m128i vCy = _mm_set1_epi32(Cy); | 81 | 255 | const __m128i vyap = _mm_set1_epi32(yap); | 82 | | | 83 | 255 | unsigned int *dptr = dest + (y * dow); | 84 | 32.8k | for (int x = 0; x < dw; x++) { | 85 | 32.6k | const unsigned int *sptr = ypoints[y] + xpoints[x]; | 86 | 32.6k | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy); | 87 | | | 88 | 32.6k | const int xap = xapoints[x]; | 89 | 32.6k | if (xap > 0) { | 90 | 0 | const __m128i vxap = _mm_set1_epi32(xap); | 91 | 0 | const __m128i vinvxap = _mm_sub_epi32(v256, vxap); | 92 | 0 | __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy); | 93 | |
| 94 | 0 | vx = _mm_mullo_epi32(vx, vinvxap); | 95 | 0 | vr = _mm_mullo_epi32(vr, vxap); | 96 | 0 | vx = _mm_add_epi32(vx, vr); | 97 | 0 | vx = _mm_srli_epi32(vx, 8); | 98 | 0 | } | 99 | 32.6k | vx = _mm_srli_epi32(vx, 14); | 100 | 32.6k | vx = _mm_packus_epi32(vx, vx); | 101 | 32.6k | vx = _mm_packus_epi16(vx, vx); | 102 | 32.6k | *dptr = _mm_cvtsi128_si32(vx); | 103 | 32.6k | if (RGB) | 104 | 32.6k | *dptr |= 0xff000000; | 105 | 32.6k | dptr++; | 106 | 32.6k | } | 107 | 255 | } | 108 | 3 | }; |
|
109 | 3 | multithread_pixels_function(isi, dh, scaleSection); |
110 | 3 | } Unexecuted instantiation: void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) Line | Count | Source | 67 | 3 | { | 68 | 3 | const unsigned int **ypoints = isi->ypoints; | 69 | 3 | const int *xpoints = isi->xpoints; | 70 | 3 | const int *xapoints = isi->xapoints; | 71 | 3 | const int *yapoints = isi->yapoints; | 72 | | | 73 | 3 | const __m128i v256 = _mm_set1_epi32(256); | 74 | | | 75 | | /* go through every scanline in the output buffer */ | 76 | 3 | auto scaleSection = [&] (int yStart, int yEnd) { | 77 | 3 | for (int y = yStart; y < yEnd; ++y) { | 78 | 3 | const int Cy = yapoints[y] >> 16; | 79 | 3 | const int yap = yapoints[y] & 0xffff; | 80 | 3 | const __m128i vCy = _mm_set1_epi32(Cy); | 81 | 3 | const __m128i vyap = _mm_set1_epi32(yap); | 82 | | | 83 | 3 | unsigned int *dptr = dest + (y * dow); | 84 | 3 | for (int x = 0; x < dw; x++) { | 85 | 3 | const unsigned int *sptr = ypoints[y] + xpoints[x]; | 86 | 3 | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy); | 87 | | | 88 | 3 | const int xap = xapoints[x]; | 89 | 3 | if (xap > 0) { | 90 | 3 | const __m128i vxap = _mm_set1_epi32(xap); | 91 | 3 | const __m128i vinvxap = _mm_sub_epi32(v256, vxap); | 92 | 3 | __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy); | 93 | | | 94 | 3 | vx = _mm_mullo_epi32(vx, vinvxap); | 95 | 3 | vr = _mm_mullo_epi32(vr, vxap); | 96 | 3 | vx = _mm_add_epi32(vx, vr); | 97 | 3 | vx = _mm_srli_epi32(vx, 8); | 98 | 3 | } | 99 | 3 | vx = _mm_srli_epi32(vx, 14); | 100 | 3 | vx = _mm_packus_epi32(vx, vx); | 101 | 3 | vx = _mm_packus_epi16(vx, vx); | 102 | 3 | *dptr = _mm_cvtsi128_si32(vx); | 103 | 3 | if (RGB) | 104 | 3 | *dptr |= 0xff000000; | 105 | 3 | dptr++; | 106 | 3 | } | 107 | 3 | } | 108 | 3 | }; | 109 | 3 | multithread_pixels_function(isi, dh, scaleSection); | 110 | 3 | } |
|
111 | | |
112 | | template<bool RGB> |
113 | | void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest, |
114 | | int dw, int dh, int dow, int sow) |
115 | 6 | { |
116 | 6 | const unsigned int **ypoints = isi->ypoints; |
117 | 6 | int *xpoints = isi->xpoints; |
118 | 6 | int *xapoints = isi->xapoints; |
119 | 6 | int *yapoints = isi->yapoints; |
120 | | |
121 | 6 | const __m128i v256 = _mm_set1_epi32(256); |
122 | | |
123 | | /* go through every scanline in the output buffer */ |
124 | 6 | auto scaleSection = [&] (int yStart, int yEnd) { |
125 | 520 | for (int y = yStart; y < yEnd; ++y) { |
126 | 514 | unsigned int *dptr = dest + (y * dow); |
127 | 20.0k | for (int x = 0; x < dw; x++) { |
128 | 19.5k | int Cx = xapoints[x] >> 16; |
129 | 19.5k | int xap = xapoints[x] & 0xffff; |
130 | 19.5k | const __m128i vCx = _mm_set1_epi32(Cx); |
131 | 19.5k | const __m128i vxap = _mm_set1_epi32(xap); |
132 | | |
133 | 19.5k | const unsigned int *sptr = ypoints[y] + xpoints[x]; |
134 | 19.5k | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); |
135 | | |
136 | 19.5k | int yap = yapoints[y]; |
137 | 19.5k | if (yap > 0) { |
138 | 0 | const __m128i vyap = _mm_set1_epi32(yap); |
139 | 0 | const __m128i vinvyap = _mm_sub_epi32(v256, vyap); |
140 | 0 | __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx); |
141 | |
|
142 | 0 | vx = _mm_mullo_epi32(vx, vinvyap); |
143 | 0 | vr = _mm_mullo_epi32(vr, vyap); |
144 | 0 | vx = _mm_add_epi32(vx, vr); |
145 | 0 | vx = _mm_srli_epi32(vx, 8); |
146 | 0 | } |
147 | 19.5k | vx = _mm_srli_epi32(vx, 14); |
148 | 19.5k | vx = _mm_packus_epi32(vx, vx); |
149 | 19.5k | vx = _mm_packus_epi16(vx, vx); |
150 | 19.5k | *dptr = _mm_cvtsi128_si32(vx); |
151 | 19.5k | if (RGB) |
152 | 19.5k | *dptr |= 0xff000000; |
153 | 19.5k | dptr++; |
154 | 19.5k | } |
155 | 514 | } |
156 | 6 | }; Unexecuted instantiation: qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constqt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constLine | Count | Source | 124 | 6 | auto scaleSection = [&] (int yStart, int yEnd) { | 125 | 520 | for (int y = yStart; y < yEnd; ++y) { | 126 | 514 | unsigned int *dptr = dest + (y * dow); | 127 | 20.0k | for (int x = 0; x < dw; x++) { | 128 | 19.5k | int Cx = xapoints[x] >> 16; | 129 | 19.5k | int xap = xapoints[x] & 0xffff; | 130 | 19.5k | const __m128i vCx = _mm_set1_epi32(Cx); | 131 | 19.5k | const __m128i vxap = _mm_set1_epi32(xap); | 132 | | | 133 | 19.5k | const unsigned int *sptr = ypoints[y] + xpoints[x]; | 134 | 19.5k | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); | 135 | | | 136 | 19.5k | int yap = yapoints[y]; | 137 | 19.5k | if (yap > 0) { | 138 | 0 | const __m128i vyap = _mm_set1_epi32(yap); | 139 | 0 | const __m128i vinvyap = _mm_sub_epi32(v256, vyap); | 140 | 0 | __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx); | 141 | |
| 142 | 0 | vx = _mm_mullo_epi32(vx, vinvyap); | 143 | 0 | vr = _mm_mullo_epi32(vr, vyap); | 144 | 0 | vx = _mm_add_epi32(vx, vr); | 145 | 0 | vx = _mm_srli_epi32(vx, 8); | 146 | 0 | } | 147 | 19.5k | vx = _mm_srli_epi32(vx, 14); | 148 | 19.5k | vx = _mm_packus_epi32(vx, vx); | 149 | 19.5k | vx = _mm_packus_epi16(vx, vx); | 150 | 19.5k | *dptr = _mm_cvtsi128_si32(vx); | 151 | 19.5k | if (RGB) | 152 | 19.5k | *dptr |= 0xff000000; | 153 | 19.5k | dptr++; | 154 | 19.5k | } | 155 | 514 | } | 156 | 6 | }; |
|
157 | 6 | multithread_pixels_function(isi, dh, scaleSection); |
158 | 6 | } Unexecuted instantiation: void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) Line | Count | Source | 115 | 6 | { | 116 | 6 | const unsigned int **ypoints = isi->ypoints; | 117 | 6 | int *xpoints = isi->xpoints; | 118 | 6 | int *xapoints = isi->xapoints; | 119 | 6 | int *yapoints = isi->yapoints; | 120 | | | 121 | 6 | const __m128i v256 = _mm_set1_epi32(256); | 122 | | | 123 | | /* go through every scanline in the output buffer */ | 124 | 6 | auto scaleSection = [&] (int yStart, int yEnd) { | 125 | 6 | for (int y = yStart; y < yEnd; ++y) { | 126 | 6 | unsigned int *dptr = dest + (y * dow); | 127 | 6 | for (int x = 0; x < dw; x++) { | 128 | 6 | int Cx = xapoints[x] >> 16; | 129 | 6 | int xap = xapoints[x] & 0xffff; | 130 | 6 | const __m128i vCx = _mm_set1_epi32(Cx); | 131 | 6 | const __m128i vxap = _mm_set1_epi32(xap); | 132 | | | 133 | 6 | const unsigned int *sptr = ypoints[y] + xpoints[x]; | 134 | 6 | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); | 135 | | | 136 | 6 | int yap = yapoints[y]; | 137 | 6 | if (yap > 0) { | 138 | 6 | const __m128i vyap = _mm_set1_epi32(yap); | 139 | 6 | const __m128i vinvyap = _mm_sub_epi32(v256, vyap); | 140 | 6 | __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx); | 141 | | | 142 | 6 | vx = _mm_mullo_epi32(vx, vinvyap); | 143 | 6 | vr = _mm_mullo_epi32(vr, vyap); | 144 | 6 | vx = _mm_add_epi32(vx, vr); | 145 | 6 | vx = _mm_srli_epi32(vx, 8); | 146 | 6 | } | 147 | 6 | vx = _mm_srli_epi32(vx, 14); | 148 | 6 | vx = _mm_packus_epi32(vx, vx); | 149 | 6 | vx = _mm_packus_epi16(vx, vx); | 150 | 6 | *dptr = _mm_cvtsi128_si32(vx); | 151 | 6 | if (RGB) | 152 | 6 | *dptr |= 0xff000000; | 153 | 6 | dptr++; | 154 | 6 | } | 155 | 6 | } | 156 | 6 | }; | 157 | 6 | multithread_pixels_function(isi, dh, scaleSection); | 158 | 6 | } |
|
159 | | |
160 | | template<bool RGB> |
161 | | void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest, |
162 | | int dw, int dh, int dow, int sow) |
163 | 1.40k | { |
164 | 1.40k | const unsigned int **ypoints = isi->ypoints; |
165 | 1.40k | int *xpoints = isi->xpoints; |
166 | 1.40k | int *xapoints = isi->xapoints; |
167 | 1.40k | int *yapoints = isi->yapoints; |
168 | | |
169 | 11.2k | auto scaleSection = [&] (int yStart, int yEnd) { |
170 | 102k | for (int y = yStart; y < yEnd; ++y) { |
171 | 90.9k | int Cy = yapoints[y] >> 16; |
172 | 90.9k | int yap = yapoints[y] & 0xffff; |
173 | 90.9k | const __m128i vCy = _mm_set1_epi32(Cy); |
174 | 90.9k | const __m128i vyap = _mm_set1_epi32(yap); |
175 | | |
176 | 90.9k | unsigned int *dptr = dest + (y * dow); |
177 | 5.24M | for (int x = 0; x < dw; x++) { |
178 | 5.15M | const int Cx = xapoints[x] >> 16; |
179 | 5.15M | const int xap = xapoints[x] & 0xffff; |
180 | 5.15M | const __m128i vCx = _mm_set1_epi32(Cx); |
181 | 5.15M | const __m128i vxap = _mm_set1_epi32(xap); |
182 | | |
183 | 5.15M | const unsigned int *sptr = ypoints[y] + xpoints[x]; |
184 | 5.15M | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); |
185 | 5.15M | __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap); |
186 | | |
187 | 5.15M | int j; |
188 | 15.7M | for (j = (1 << 14) - yap; j > Cy; j -= Cy) { |
189 | 10.6M | sptr += sow; |
190 | 10.6M | vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); |
191 | 10.6M | vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy)); |
192 | 10.6M | } |
193 | 5.15M | sptr += sow; |
194 | 5.15M | vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); |
195 | 5.15M | vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j))); |
196 | | |
197 | 5.15M | vr = _mm_srli_epi32(vr, 24); |
198 | 5.15M | vr = _mm_packus_epi32(vr, _mm_setzero_si128()); |
199 | 5.15M | vr = _mm_packus_epi16(vr, _mm_setzero_si128()); |
200 | 5.15M | *dptr = _mm_cvtsi128_si32(vr); |
201 | 5.15M | if (RGB) |
202 | 5.13M | *dptr |= 0xff000000; |
203 | 5.15M | dptr++; |
204 | 5.15M | } |
205 | 90.9k | } |
206 | 11.2k | }; Unexecuted instantiation: qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constqt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) constLine | Count | Source | 169 | 11.2k | auto scaleSection = [&] (int yStart, int yEnd) { | 170 | 102k | for (int y = yStart; y < yEnd; ++y) { | 171 | 90.9k | int Cy = yapoints[y] >> 16; | 172 | 90.9k | int yap = yapoints[y] & 0xffff; | 173 | 90.9k | const __m128i vCy = _mm_set1_epi32(Cy); | 174 | 90.9k | const __m128i vyap = _mm_set1_epi32(yap); | 175 | | | 176 | 90.9k | unsigned int *dptr = dest + (y * dow); | 177 | 5.24M | for (int x = 0; x < dw; x++) { | 178 | 5.15M | const int Cx = xapoints[x] >> 16; | 179 | 5.15M | const int xap = xapoints[x] & 0xffff; | 180 | 5.15M | const __m128i vCx = _mm_set1_epi32(Cx); | 181 | 5.15M | const __m128i vxap = _mm_set1_epi32(xap); | 182 | | | 183 | 5.15M | const unsigned int *sptr = ypoints[y] + xpoints[x]; | 184 | 5.15M | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); | 185 | 5.15M | __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap); | 186 | | | 187 | 5.15M | int j; | 188 | 15.7M | for (j = (1 << 14) - yap; j > Cy; j -= Cy) { | 189 | 10.6M | sptr += sow; | 190 | 10.6M | vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); | 191 | 10.6M | vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy)); | 192 | 10.6M | } | 193 | 5.15M | sptr += sow; | 194 | 5.15M | vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); | 195 | 5.15M | vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j))); | 196 | | | 197 | 5.15M | vr = _mm_srli_epi32(vr, 24); | 198 | 5.15M | vr = _mm_packus_epi32(vr, _mm_setzero_si128()); | 199 | 5.15M | vr = _mm_packus_epi16(vr, _mm_setzero_si128()); | 200 | 5.15M | *dptr = _mm_cvtsi128_si32(vr); | 201 | 5.15M | if (RGB) | 202 | 5.13M | *dptr |= 0xff000000; | 203 | 5.15M | dptr++; | 204 | 5.15M | } | 205 | 90.9k | } | 206 | 11.2k | }; |
|
207 | 1.40k | multithread_pixels_function(isi, dh, scaleSection); |
208 | 1.40k | } Unexecuted instantiation: void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int) Line | Count | Source | 163 | 1.40k | { | 164 | 1.40k | const unsigned int **ypoints = isi->ypoints; | 165 | 1.40k | int *xpoints = isi->xpoints; | 166 | 1.40k | int *xapoints = isi->xapoints; | 167 | 1.40k | int *yapoints = isi->yapoints; | 168 | | | 169 | 1.40k | auto scaleSection = [&] (int yStart, int yEnd) { | 170 | 1.40k | for (int y = yStart; y < yEnd; ++y) { | 171 | 1.40k | int Cy = yapoints[y] >> 16; | 172 | 1.40k | int yap = yapoints[y] & 0xffff; | 173 | 1.40k | const __m128i vCy = _mm_set1_epi32(Cy); | 174 | 1.40k | const __m128i vyap = _mm_set1_epi32(yap); | 175 | | | 176 | 1.40k | unsigned int *dptr = dest + (y * dow); | 177 | 1.40k | for (int x = 0; x < dw; x++) { | 178 | 1.40k | const int Cx = xapoints[x] >> 16; | 179 | 1.40k | const int xap = xapoints[x] & 0xffff; | 180 | 1.40k | const __m128i vCx = _mm_set1_epi32(Cx); | 181 | 1.40k | const __m128i vxap = _mm_set1_epi32(xap); | 182 | | | 183 | 1.40k | const unsigned int *sptr = ypoints[y] + xpoints[x]; | 184 | 1.40k | __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); | 185 | 1.40k | __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap); | 186 | | | 187 | 1.40k | int j; | 188 | 1.40k | for (j = (1 << 14) - yap; j > Cy; j -= Cy) { | 189 | 1.40k | sptr += sow; | 190 | 1.40k | vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); | 191 | 1.40k | vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy)); | 192 | 1.40k | } | 193 | 1.40k | sptr += sow; | 194 | 1.40k | vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); | 195 | 1.40k | vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j))); | 196 | | | 197 | 1.40k | vr = _mm_srli_epi32(vr, 24); | 198 | 1.40k | vr = _mm_packus_epi32(vr, _mm_setzero_si128()); | 199 | 1.40k | vr = _mm_packus_epi16(vr, _mm_setzero_si128()); | 200 | 1.40k | *dptr = _mm_cvtsi128_si32(vr); | 201 | 1.40k | if (RGB) | 202 | 1.40k | *dptr |= 0xff000000; | 203 | 1.40k | dptr++; | 204 | 1.40k | } | 205 | 1.40k | } | 206 | 1.40k | }; | 207 | 1.40k | multithread_pixels_function(isi, dh, scaleSection); | 208 | 1.40k | } |
|
209 | | |
210 | | template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest, |
211 | | int dw, int dh, int dow, int sow); |
212 | | |
213 | | template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest, |
214 | | int dw, int dh, int dow, int sow); |
215 | | |
216 | | template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest, |
217 | | int dw, int dh, int dow, int sow); |
218 | | |
219 | | template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest, |
220 | | int dw, int dh, int dow, int sow); |
221 | | |
222 | | template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest, |
223 | | int dw, int dh, int dow, int sow); |
224 | | |
225 | | template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest, |
226 | | int dw, int dh, int dow, int sow); |
227 | | |
228 | | QT_END_NAMESPACE |
229 | | |
230 | | #endif |