Coverage Report

Created: 2026-01-25 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qtbase/src/gui/painting/qimagescale_sse4.cpp
Line
Count
Source
1
// Copyright (C) 2016 The Qt Company Ltd.
2
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4
#include "qimagescale_p.h"
5
#include "qimage.h"
6
#include <private/qdrawhelper_x86_p.h>
7
#include <private/qsimd_p.h>
8
9
#if QT_CONFIG(qtgui_threadpool)
10
#include <private/qlatch_p.h>
11
#include <qthreadpool.h>
12
#include <private/qguiapplication_p.h>
13
#include <private/qthreadpool_p.h>
14
#endif
15
16
#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
17
18
QT_BEGIN_NAMESPACE
19
20
using namespace QImageScale;
21
22
template<typename T>
23
static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
24
1.40k
{
25
1.40k
#if QT_CONFIG(qtgui_threadpool)
26
1.40k
    int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
27
1.40k
    segments = std::min(segments, dh);
28
1.40k
    QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
29
1.40k
    if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
30
504
        QLatch latch(segments);
31
504
        int y = 0;
32
10.8k
        for (int i = 0; i < segments; ++i) {
33
10.3k
            int yn = (dh - y) / (segments - i);
34
10.3k
            threadPool->start([&, y, yn]() {
35
10.3k
                scaleSection(y, y + yn);
36
10.3k
                latch.countDown();
37
10.3k
            });
Unexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() const
Unexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() const
Unexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() const
Unexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() const
Unexecuted instantiation: qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() const
qimagescale_sse4.cpp:multithread_pixels_function<qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)::{lambda()#1}::operator()() const
Line
Count
Source
34
10.3k
            threadPool->start([&, y, yn]() {
35
10.3k
                scaleSection(y, y + yn);
36
10.3k
                latch.countDown();
37
10.3k
            });
38
10.3k
            y += yn;
39
10.3k
        }
40
504
        latch.wait();
41
504
        return;
42
504
    }
43
905
#endif
44
905
    scaleSection(0, dh);
45
905
}
Unexecuted instantiation: qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)
qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)
Line
Count
Source
24
3
{
25
3
#if QT_CONFIG(qtgui_threadpool)
26
3
    int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
27
3
    segments = std::min(segments, dh);
28
3
    QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
29
3
    if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
30
0
        QLatch latch(segments);
31
0
        int y = 0;
32
0
        for (int i = 0; i < segments; ++i) {
33
0
            int yn = (dh - y) / (segments - i);
34
0
            threadPool->start([&, y, yn]() {
35
0
                scaleSection(y, y + yn);
36
0
                latch.countDown();
37
0
            });
38
0
            y += yn;
39
0
        }
40
0
        latch.wait();
41
0
        return;
42
0
    }
43
3
#endif
44
3
    scaleSection(0, dh);
45
3
}
Unexecuted instantiation: qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)
qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)
Line
Count
Source
24
6
{
25
6
#if QT_CONFIG(qtgui_threadpool)
26
6
    int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
27
6
    segments = std::min(segments, dh);
28
6
    QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
29
6
    if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
30
0
        QLatch latch(segments);
31
0
        int y = 0;
32
0
        for (int i = 0; i < segments; ++i) {
33
0
            int yn = (dh - y) / (segments - i);
34
0
            threadPool->start([&, y, yn]() {
35
0
                scaleSection(y, y + yn);
36
0
                latch.countDown();
37
0
            });
38
0
            y += yn;
39
0
        }
40
0
        latch.wait();
41
0
        return;
42
0
    }
43
6
#endif
44
6
    scaleSection(0, dh);
45
6
}
Unexecuted instantiation: qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)
qimagescale_sse4.cpp:void multithread_pixels_function<qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}>(QImageScale::QImageScaleInfo*, int, qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1} const&)
Line
Count
Source
24
1.40k
{
25
1.40k
#if QT_CONFIG(qtgui_threadpool)
26
1.40k
    int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
27
1.40k
    segments = std::min(segments, dh);
28
1.40k
    QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
29
1.40k
    if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
30
504
        QLatch latch(segments);
31
504
        int y = 0;
32
10.8k
        for (int i = 0; i < segments; ++i) {
33
10.3k
            int yn = (dh - y) / (segments - i);
34
10.3k
            threadPool->start([&, y, yn]() {
35
10.3k
                scaleSection(y, y + yn);
36
10.3k
                latch.countDown();
37
10.3k
            });
38
10.3k
            y += yn;
39
10.3k
        }
40
504
        latch.wait();
41
504
        return;
42
504
    }
43
896
#endif
44
896
    scaleSection(0, dh);
45
896
}
46
47
inline static __m128i Q_DECL_VECTORCALL
48
qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
49
18.6M
{
50
18.6M
    __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
51
18.6M
    __m128i vx = _mm_mullo_epi32(vpix, vxyap);
52
18.6M
    int i;
53
228M
    for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
54
210M
        pix += step;
55
210M
        vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
56
210M
        vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
57
210M
    }
58
18.6M
    pix += step;
59
18.6M
    vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
60
18.6M
    vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
61
18.6M
    return vx;
62
18.6M
}
63
64
template<bool RGB>
65
void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
66
                                           int dw, int dh, int dow, int sow)
67
3
{
68
3
    const unsigned int **ypoints = isi->ypoints;
69
3
    const int *xpoints = isi->xpoints;
70
3
    const int *xapoints = isi->xapoints;
71
3
    const int *yapoints = isi->yapoints;
72
73
3
    const __m128i v256 = _mm_set1_epi32(256);
74
75
    /* go through every scanline in the output buffer */
76
3
    auto scaleSection = [&] (int yStart, int yEnd) {
77
258
        for (int y = yStart; y < yEnd; ++y) {
78
255
            const int Cy = yapoints[y] >> 16;
79
255
            const int yap = yapoints[y] & 0xffff;
80
255
            const __m128i vCy = _mm_set1_epi32(Cy);
81
255
            const __m128i vyap = _mm_set1_epi32(yap);
82
83
255
            unsigned int *dptr = dest + (y * dow);
84
32.8k
            for (int x = 0; x < dw; x++) {
85
32.6k
                const unsigned int *sptr = ypoints[y] + xpoints[x];
86
32.6k
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
87
88
32.6k
                const int xap = xapoints[x];
89
32.6k
                if (xap > 0) {
90
0
                    const __m128i vxap = _mm_set1_epi32(xap);
91
0
                    const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
92
0
                    __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
93
94
0
                    vx = _mm_mullo_epi32(vx, vinvxap);
95
0
                    vr = _mm_mullo_epi32(vr, vxap);
96
0
                    vx = _mm_add_epi32(vx, vr);
97
0
                    vx = _mm_srli_epi32(vx, 8);
98
0
                }
99
32.6k
                vx = _mm_srli_epi32(vx, 14);
100
32.6k
                vx = _mm_packus_epi32(vx, vx);
101
32.6k
                vx = _mm_packus_epi16(vx, vx);
102
32.6k
                *dptr = _mm_cvtsi128_si32(vx);
103
32.6k
                if (RGB)
104
32.6k
                    *dptr |= 0xff000000;
105
32.6k
                dptr++;
106
32.6k
            }
107
255
        }
108
3
    };
Unexecuted instantiation: qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const
qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const
Line
Count
Source
76
3
    auto scaleSection = [&] (int yStart, int yEnd) {
77
258
        for (int y = yStart; y < yEnd; ++y) {
78
255
            const int Cy = yapoints[y] >> 16;
79
255
            const int yap = yapoints[y] & 0xffff;
80
255
            const __m128i vCy = _mm_set1_epi32(Cy);
81
255
            const __m128i vyap = _mm_set1_epi32(yap);
82
83
255
            unsigned int *dptr = dest + (y * dow);
84
32.8k
            for (int x = 0; x < dw; x++) {
85
32.6k
                const unsigned int *sptr = ypoints[y] + xpoints[x];
86
32.6k
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
87
88
32.6k
                const int xap = xapoints[x];
89
32.6k
                if (xap > 0) {
90
0
                    const __m128i vxap = _mm_set1_epi32(xap);
91
0
                    const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
92
0
                    __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
93
94
0
                    vx = _mm_mullo_epi32(vx, vinvxap);
95
0
                    vr = _mm_mullo_epi32(vr, vxap);
96
0
                    vx = _mm_add_epi32(vx, vr);
97
0
                    vx = _mm_srli_epi32(vx, 8);
98
0
                }
99
32.6k
                vx = _mm_srli_epi32(vx, 14);
100
32.6k
                vx = _mm_packus_epi32(vx, vx);
101
32.6k
                vx = _mm_packus_epi16(vx, vx);
102
32.6k
                *dptr = _mm_cvtsi128_si32(vx);
103
32.6k
                if (RGB)
104
32.6k
                    *dptr |= 0xff000000;
105
32.6k
                dptr++;
106
32.6k
            }
107
255
        }
108
3
    };
109
3
    multithread_pixels_function(isi, dh, scaleSection);
110
3
}
Unexecuted instantiation: void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)
void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)
Line
Count
Source
67
3
{
68
3
    const unsigned int **ypoints = isi->ypoints;
69
3
    const int *xpoints = isi->xpoints;
70
3
    const int *xapoints = isi->xapoints;
71
3
    const int *yapoints = isi->yapoints;
72
73
3
    const __m128i v256 = _mm_set1_epi32(256);
74
75
    /* go through every scanline in the output buffer */
76
3
    auto scaleSection = [&] (int yStart, int yEnd) {
77
3
        for (int y = yStart; y < yEnd; ++y) {
78
3
            const int Cy = yapoints[y] >> 16;
79
3
            const int yap = yapoints[y] & 0xffff;
80
3
            const __m128i vCy = _mm_set1_epi32(Cy);
81
3
            const __m128i vyap = _mm_set1_epi32(yap);
82
83
3
            unsigned int *dptr = dest + (y * dow);
84
3
            for (int x = 0; x < dw; x++) {
85
3
                const unsigned int *sptr = ypoints[y] + xpoints[x];
86
3
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
87
88
3
                const int xap = xapoints[x];
89
3
                if (xap > 0) {
90
3
                    const __m128i vxap = _mm_set1_epi32(xap);
91
3
                    const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
92
3
                    __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
93
94
3
                    vx = _mm_mullo_epi32(vx, vinvxap);
95
3
                    vr = _mm_mullo_epi32(vr, vxap);
96
3
                    vx = _mm_add_epi32(vx, vr);
97
3
                    vx = _mm_srli_epi32(vx, 8);
98
3
                }
99
3
                vx = _mm_srli_epi32(vx, 14);
100
3
                vx = _mm_packus_epi32(vx, vx);
101
3
                vx = _mm_packus_epi16(vx, vx);
102
3
                *dptr = _mm_cvtsi128_si32(vx);
103
3
                if (RGB)
104
3
                    *dptr |= 0xff000000;
105
3
                dptr++;
106
3
            }
107
3
        }
108
3
    };
109
3
    multithread_pixels_function(isi, dh, scaleSection);
110
3
}
111
112
template<bool RGB>
113
void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
114
                                           int dw, int dh, int dow, int sow)
115
6
{
116
6
    const unsigned int **ypoints = isi->ypoints;
117
6
    int *xpoints = isi->xpoints;
118
6
    int *xapoints = isi->xapoints;
119
6
    int *yapoints = isi->yapoints;
120
121
6
    const __m128i v256 = _mm_set1_epi32(256);
122
123
    /* go through every scanline in the output buffer */
124
6
    auto scaleSection = [&] (int yStart, int yEnd) {
125
520
        for (int y = yStart; y < yEnd; ++y) {
126
514
            unsigned int *dptr = dest + (y * dow);
127
20.0k
            for (int x = 0; x < dw; x++) {
128
19.5k
                int Cx = xapoints[x] >> 16;
129
19.5k
                int xap = xapoints[x] & 0xffff;
130
19.5k
                const __m128i vCx = _mm_set1_epi32(Cx);
131
19.5k
                const __m128i vxap = _mm_set1_epi32(xap);
132
133
19.5k
                const unsigned int *sptr = ypoints[y] + xpoints[x];
134
19.5k
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
135
136
19.5k
                int yap = yapoints[y];
137
19.5k
                if (yap > 0) {
138
0
                    const __m128i vyap = _mm_set1_epi32(yap);
139
0
                    const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
140
0
                    __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
141
142
0
                    vx = _mm_mullo_epi32(vx, vinvyap);
143
0
                    vr = _mm_mullo_epi32(vr, vyap);
144
0
                    vx = _mm_add_epi32(vx, vr);
145
0
                    vx = _mm_srli_epi32(vx, 8);
146
0
                }
147
19.5k
                vx = _mm_srli_epi32(vx, 14);
148
19.5k
                vx = _mm_packus_epi32(vx, vx);
149
19.5k
                vx = _mm_packus_epi16(vx, vx);
150
19.5k
                *dptr = _mm_cvtsi128_si32(vx);
151
19.5k
                if (RGB)
152
19.5k
                    *dptr |= 0xff000000;
153
19.5k
                dptr++;
154
19.5k
            }
155
514
        }
156
6
    };
Unexecuted instantiation: qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const
qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const
Line
Count
Source
124
6
    auto scaleSection = [&] (int yStart, int yEnd) {
125
520
        for (int y = yStart; y < yEnd; ++y) {
126
514
            unsigned int *dptr = dest + (y * dow);
127
20.0k
            for (int x = 0; x < dw; x++) {
128
19.5k
                int Cx = xapoints[x] >> 16;
129
19.5k
                int xap = xapoints[x] & 0xffff;
130
19.5k
                const __m128i vCx = _mm_set1_epi32(Cx);
131
19.5k
                const __m128i vxap = _mm_set1_epi32(xap);
132
133
19.5k
                const unsigned int *sptr = ypoints[y] + xpoints[x];
134
19.5k
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
135
136
19.5k
                int yap = yapoints[y];
137
19.5k
                if (yap > 0) {
138
0
                    const __m128i vyap = _mm_set1_epi32(yap);
139
0
                    const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
140
0
                    __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
141
142
0
                    vx = _mm_mullo_epi32(vx, vinvyap);
143
0
                    vr = _mm_mullo_epi32(vr, vyap);
144
0
                    vx = _mm_add_epi32(vx, vr);
145
0
                    vx = _mm_srli_epi32(vx, 8);
146
0
                }
147
19.5k
                vx = _mm_srli_epi32(vx, 14);
148
19.5k
                vx = _mm_packus_epi32(vx, vx);
149
19.5k
                vx = _mm_packus_epi16(vx, vx);
150
19.5k
                *dptr = _mm_cvtsi128_si32(vx);
151
19.5k
                if (RGB)
152
19.5k
                    *dptr |= 0xff000000;
153
19.5k
                dptr++;
154
19.5k
            }
155
514
        }
156
6
    };
157
6
    multithread_pixels_function(isi, dh, scaleSection);
158
6
}
Unexecuted instantiation: void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)
void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)
Line
Count
Source
115
6
{
116
6
    const unsigned int **ypoints = isi->ypoints;
117
6
    int *xpoints = isi->xpoints;
118
6
    int *xapoints = isi->xapoints;
119
6
    int *yapoints = isi->yapoints;
120
121
6
    const __m128i v256 = _mm_set1_epi32(256);
122
123
    /* go through every scanline in the output buffer */
124
6
    auto scaleSection = [&] (int yStart, int yEnd) {
125
6
        for (int y = yStart; y < yEnd; ++y) {
126
6
            unsigned int *dptr = dest + (y * dow);
127
6
            for (int x = 0; x < dw; x++) {
128
6
                int Cx = xapoints[x] >> 16;
129
6
                int xap = xapoints[x] & 0xffff;
130
6
                const __m128i vCx = _mm_set1_epi32(Cx);
131
6
                const __m128i vxap = _mm_set1_epi32(xap);
132
133
6
                const unsigned int *sptr = ypoints[y] + xpoints[x];
134
6
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
135
136
6
                int yap = yapoints[y];
137
6
                if (yap > 0) {
138
6
                    const __m128i vyap = _mm_set1_epi32(yap);
139
6
                    const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
140
6
                    __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
141
142
6
                    vx = _mm_mullo_epi32(vx, vinvyap);
143
6
                    vr = _mm_mullo_epi32(vr, vyap);
144
6
                    vx = _mm_add_epi32(vx, vr);
145
6
                    vx = _mm_srli_epi32(vx, 8);
146
6
                }
147
6
                vx = _mm_srli_epi32(vx, 14);
148
6
                vx = _mm_packus_epi32(vx, vx);
149
6
                vx = _mm_packus_epi16(vx, vx);
150
6
                *dptr = _mm_cvtsi128_si32(vx);
151
6
                if (RGB)
152
6
                    *dptr |= 0xff000000;
153
6
                dptr++;
154
6
            }
155
6
        }
156
6
    };
157
6
    multithread_pixels_function(isi, dh, scaleSection);
158
6
}
159
160
template<bool RGB>
161
void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
162
                                       int dw, int dh, int dow, int sow)
163
1.40k
{
164
1.40k
    const unsigned int **ypoints = isi->ypoints;
165
1.40k
    int *xpoints = isi->xpoints;
166
1.40k
    int *xapoints = isi->xapoints;
167
1.40k
    int *yapoints = isi->yapoints;
168
169
11.2k
    auto scaleSection = [&] (int yStart, int yEnd) {
170
102k
        for (int y = yStart; y < yEnd; ++y) {
171
90.9k
            int Cy = yapoints[y] >> 16;
172
90.9k
            int yap = yapoints[y] & 0xffff;
173
90.9k
            const __m128i vCy = _mm_set1_epi32(Cy);
174
90.9k
            const __m128i vyap = _mm_set1_epi32(yap);
175
176
90.9k
            unsigned int *dptr = dest + (y * dow);
177
5.24M
            for (int x = 0; x < dw; x++) {
178
5.15M
                const int Cx = xapoints[x] >> 16;
179
5.15M
                const int xap = xapoints[x] & 0xffff;
180
5.15M
                const __m128i vCx = _mm_set1_epi32(Cx);
181
5.15M
                const __m128i vxap = _mm_set1_epi32(xap);
182
183
5.15M
                const unsigned int *sptr = ypoints[y] + xpoints[x];
184
5.15M
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
185
5.15M
                __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
186
187
5.15M
                int j;
188
15.7M
                for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
189
10.6M
                    sptr += sow;
190
10.6M
                    vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
191
10.6M
                    vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
192
10.6M
                }
193
5.15M
                sptr += sow;
194
5.15M
                vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
195
5.15M
                vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
196
197
5.15M
                vr = _mm_srli_epi32(vr, 24);
198
5.15M
                vr = _mm_packus_epi32(vr, _mm_setzero_si128());
199
5.15M
                vr = _mm_packus_epi16(vr, _mm_setzero_si128());
200
5.15M
                *dptr = _mm_cvtsi128_si32(vr);
201
5.15M
                if (RGB)
202
5.13M
                    *dptr |= 0xff000000;
203
5.15M
                dptr++;
204
5.15M
            }
205
90.9k
        }
206
11.2k
    };
Unexecuted instantiation: qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const
qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)::{lambda(int, int)#1}::operator()(int, int) const
Line
Count
Source
169
11.2k
    auto scaleSection = [&] (int yStart, int yEnd) {
170
102k
        for (int y = yStart; y < yEnd; ++y) {
171
90.9k
            int Cy = yapoints[y] >> 16;
172
90.9k
            int yap = yapoints[y] & 0xffff;
173
90.9k
            const __m128i vCy = _mm_set1_epi32(Cy);
174
90.9k
            const __m128i vyap = _mm_set1_epi32(yap);
175
176
90.9k
            unsigned int *dptr = dest + (y * dow);
177
5.24M
            for (int x = 0; x < dw; x++) {
178
5.15M
                const int Cx = xapoints[x] >> 16;
179
5.15M
                const int xap = xapoints[x] & 0xffff;
180
5.15M
                const __m128i vCx = _mm_set1_epi32(Cx);
181
5.15M
                const __m128i vxap = _mm_set1_epi32(xap);
182
183
5.15M
                const unsigned int *sptr = ypoints[y] + xpoints[x];
184
5.15M
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
185
5.15M
                __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
186
187
5.15M
                int j;
188
15.7M
                for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
189
10.6M
                    sptr += sow;
190
10.6M
                    vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
191
10.6M
                    vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
192
10.6M
                }
193
5.15M
                sptr += sow;
194
5.15M
                vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
195
5.15M
                vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
196
197
5.15M
                vr = _mm_srli_epi32(vr, 24);
198
5.15M
                vr = _mm_packus_epi32(vr, _mm_setzero_si128());
199
5.15M
                vr = _mm_packus_epi16(vr, _mm_setzero_si128());
200
5.15M
                *dptr = _mm_cvtsi128_si32(vr);
201
5.15M
                if (RGB)
202
5.13M
                    *dptr |= 0xff000000;
203
5.15M
                dptr++;
204
5.15M
            }
205
90.9k
        }
206
11.2k
    };
207
1.40k
    multithread_pixels_function(isi, dh, scaleSection);
208
1.40k
}
Unexecuted instantiation: void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)
void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScale::QImageScaleInfo*, unsigned int*, int, int, int, int)
Line
Count
Source
163
1.40k
{
164
1.40k
    const unsigned int **ypoints = isi->ypoints;
165
1.40k
    int *xpoints = isi->xpoints;
166
1.40k
    int *xapoints = isi->xapoints;
167
1.40k
    int *yapoints = isi->yapoints;
168
169
1.40k
    auto scaleSection = [&] (int yStart, int yEnd) {
170
1.40k
        for (int y = yStart; y < yEnd; ++y) {
171
1.40k
            int Cy = yapoints[y] >> 16;
172
1.40k
            int yap = yapoints[y] & 0xffff;
173
1.40k
            const __m128i vCy = _mm_set1_epi32(Cy);
174
1.40k
            const __m128i vyap = _mm_set1_epi32(yap);
175
176
1.40k
            unsigned int *dptr = dest + (y * dow);
177
1.40k
            for (int x = 0; x < dw; x++) {
178
1.40k
                const int Cx = xapoints[x] >> 16;
179
1.40k
                const int xap = xapoints[x] & 0xffff;
180
1.40k
                const __m128i vCx = _mm_set1_epi32(Cx);
181
1.40k
                const __m128i vxap = _mm_set1_epi32(xap);
182
183
1.40k
                const unsigned int *sptr = ypoints[y] + xpoints[x];
184
1.40k
                __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
185
1.40k
                __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
186
187
1.40k
                int j;
188
1.40k
                for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
189
1.40k
                    sptr += sow;
190
1.40k
                    vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
191
1.40k
                    vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
192
1.40k
                }
193
1.40k
                sptr += sow;
194
1.40k
                vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
195
1.40k
                vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
196
197
1.40k
                vr = _mm_srli_epi32(vr, 24);
198
1.40k
                vr = _mm_packus_epi32(vr, _mm_setzero_si128());
199
1.40k
                vr = _mm_packus_epi16(vr, _mm_setzero_si128());
200
1.40k
                *dptr = _mm_cvtsi128_si32(vr);
201
1.40k
                if (RGB)
202
1.40k
                    *dptr |= 0xff000000;
203
1.40k
                dptr++;
204
1.40k
            }
205
1.40k
        }
206
1.40k
    };
207
1.40k
    multithread_pixels_function(isi, dh, scaleSection);
208
1.40k
}
209
210
template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
211
                                                           int dw, int dh, int dow, int sow);
212
213
template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
214
                                                          int dw, int dh, int dow, int sow);
215
216
template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
217
                                                           int dw, int dh, int dow, int sow);
218
219
template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
220
                                                          int dw, int dh, int dow, int sow);
221
222
template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
223
                                                       int dw, int dh, int dow, int sow);
224
225
template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
226
                                                      int dw, int dh, int dow, int sow);
227
228
QT_END_NAMESPACE
229
230
#endif