/src/qtbase/src/gui/image/qimage_ssse3.cpp
Line | Count | Source |
1 | | // Copyright (C) 2016 The Qt Company Ltd. |
2 | | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | | |
4 | | #include <qimage.h> |
5 | | #include <private/qimage_p.h> |
6 | | #include <private/qsimd_p.h> |
7 | | |
8 | | #ifdef QT_COMPILER_SUPPORTS_SSSE3 |
9 | | |
10 | | QT_BEGIN_NAMESPACE |
11 | | |
12 | | // Convert a scanline of RGB888 (src) to RGB32 (dst) |
13 | | // src must be at least len * 3 bytes |
14 | | // dst must be at least len * 4 bytes |
15 | | Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len) |
16 | 0 | { |
17 | 0 | int i = 0; |
18 | | |
19 | | // Prologue, align dst to 16 bytes. |
20 | 0 | ALIGNMENT_PROLOGUE_16BYTES(dst, i, len) { |
21 | 0 | dst[i] = qRgb(src[0], src[1], src[2]); |
22 | 0 | src += 3; |
23 | 0 | } |
24 | | |
25 | | // Mask the 4 first colors of the RGB888 vector |
26 | 0 | const __m128i shuffleMask = _mm_set_epi8(char(0xff), 9, 10, 11, char(0xff), 6, 7, 8, char(0xff), 3, 4, 5, char(0xff), 0, 1, 2); |
27 | | |
28 | | // Mask the 4 last colors of a RGB888 vector with an offset of 1 (so the last 3 bytes are RGB) |
29 | 0 | const __m128i shuffleMaskEnd = _mm_set_epi8(char(0xff), 13, 14, 15, char(0xff), 10, 11, 12, char(0xff), 7, 8, 9, char(0xff), 4, 5, 6); |
30 | | |
31 | | // Mask to have alpha = 0xff |
32 | 0 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); |
33 | |
|
34 | 0 | const __m128i *inVectorPtr = (const __m128i *)src; |
35 | 0 | __m128i *dstVectorPtr = (__m128i *)(dst + i); |
36 | |
|
37 | 0 | for (; i < (len - 15); i += 16) { // one iteration in the loop converts 16 pixels |
38 | | /* |
39 | | RGB888 has 5 pixels per vector, + 1 byte from the next pixel. The idea here is |
40 | | to load vectors of RGB888 and use palignr to select a vector out of two vectors. |
41 | | |
42 | | After 3 loads of RGB888 and 3 stores of RGB32, we have 4 pixels left in the last |
43 | | vector of RGB888, we can mask it directly to get a last store or RGB32. After that, |
44 | | the first next byte is a R, and we can loop for the next 16 pixels. |
45 | | |
46 | | The conversion itself is done with a byte permutation (pshufb). |
47 | | */ |
48 | 0 | __m128i firstSrcVector = _mm_lddqu_si128(inVectorPtr); |
49 | 0 | __m128i outputVector = _mm_shuffle_epi8(firstSrcVector, shuffleMask); |
50 | 0 | _mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask)); |
51 | 0 | ++inVectorPtr; |
52 | 0 | ++dstVectorPtr; |
53 | | |
54 | | // There are 4 unused bytes left in srcVector, we need to load the next 16 bytes |
55 | | // and load the next input with palignr |
56 | 0 | __m128i secondSrcVector = _mm_lddqu_si128(inVectorPtr); |
57 | 0 | __m128i srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 12); |
58 | 0 | outputVector = _mm_shuffle_epi8(srcVector, shuffleMask); |
59 | 0 | _mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask)); |
60 | 0 | ++inVectorPtr; |
61 | 0 | ++dstVectorPtr; |
62 | 0 | firstSrcVector = secondSrcVector; |
63 | | |
64 | | // We now have 8 unused bytes left in firstSrcVector |
65 | 0 | secondSrcVector = _mm_lddqu_si128(inVectorPtr); |
66 | 0 | srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 8); |
67 | 0 | outputVector = _mm_shuffle_epi8(srcVector, shuffleMask); |
68 | 0 | _mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask)); |
69 | 0 | ++inVectorPtr; |
70 | 0 | ++dstVectorPtr; |
71 | | |
72 | | // There are now 12 unused bytes in firstSrcVector. |
73 | | // We can mask them directly, almost there. |
74 | 0 | outputVector = _mm_shuffle_epi8(secondSrcVector, shuffleMaskEnd); |
75 | 0 | _mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask)); |
76 | 0 | ++dstVectorPtr; |
77 | 0 | } |
78 | 0 | src = (const uchar *)inVectorPtr; |
79 | |
|
80 | 0 | SIMD_EPILOGUE(i, len, 15) { |
81 | 0 | dst[i] = qRgb(src[0], src[1], src[2]); |
82 | 0 | src += 3; |
83 | 0 | } |
84 | 0 | } |
85 | | |
86 | | void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) |
87 | 0 | { |
88 | 0 | Q_ASSERT(src->format == QImage::Format_RGB888 || src->format == QImage::Format_BGR888); |
89 | 0 | if (src->format == QImage::Format_BGR888) |
90 | 0 | Q_ASSERT(dest->format == QImage::Format_RGBX8888 || dest->format == QImage::Format_RGBA8888 || dest->format == QImage::Format_RGBA8888_Premultiplied); |
91 | 0 | else |
92 | 0 | Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied); |
93 | 0 | Q_ASSERT(src->width == dest->width); |
94 | 0 | Q_ASSERT(src->height == dest->height); |
95 | |
|
96 | 0 | const uchar *src_data = (uchar *) src->data; |
97 | 0 | quint32 *dest_data = (quint32 *) dest->data; |
98 | |
|
99 | 0 | for (int i = 0; i < src->height; ++i) { |
100 | 0 | qt_convert_rgb888_to_rgb32_ssse3(dest_data, src_data, src->width); |
101 | 0 | src_data += src->bytes_per_line; |
102 | 0 | dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line); |
103 | 0 | } |
104 | 0 | } |
105 | | |
106 | | QT_END_NAMESPACE |
107 | | |
108 | | #endif // QT_COMPILER_SUPPORTS_SSSE3 |