Coverage Report

Created: 2025-06-22 07:10

/src/libwebp/src/dsp/lossless_sse41.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2021 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// SSE41 variant of methods for lossless decoder
11
12
#include "src/dsp/dsp.h"
13
14
#if defined(WEBP_USE_SSE41)
15
#include <emmintrin.h>
16
#include <smmintrin.h>
17
18
#include "src/webp/types.h"
19
#include "src/dsp/cpu.h"
20
#include "src/dsp/lossless.h"
21
22
//------------------------------------------------------------------------------
23
// Color-space conversion functions
24
25
static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
26
                                        const uint32_t* const src,
27
12.3M
                                        int num_pixels, uint32_t* dst) {
28
// sign-extended multiplying constants, pre-shifted by 5.
29
37.1M
#define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
30
12.3M
  const __m128i mults_rb =
31
12.3M
      _mm_set1_epi32((int)((uint32_t)CST(green_to_red) << 16 |
32
12.3M
                           (CST(green_to_blue) & 0xffff)));
33
12.3M
  const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue));
34
12.3M
#undef CST
35
12.3M
  const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);
36
12.3M
  const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5,
37
12.3M
                                      -1, 9, -1, 9, -1, 13, -1, 13);
38
12.3M
  const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1,
39
12.3M
                                      -1, 10, -1, -1, -1, 14, -1, -1);
40
12.3M
  int i;
41
37.7M
  for (i = 0; i + 4 <= num_pixels; i += 4) {
42
25.4M
    const __m128i A = _mm_loadu_si128((const __m128i*)(src + i));
43
25.4M
    const __m128i B = _mm_shuffle_epi8(A, perm1); // argb -> g0g0
44
25.4M
    const __m128i C = _mm_mulhi_epi16(B, mults_rb);
45
25.4M
    const __m128i D = _mm_add_epi8(A, C);
46
25.4M
    const __m128i E = _mm_shuffle_epi8(D, perm2);
47
25.4M
    const __m128i F = _mm_mulhi_epi16(E, mults_b2);
48
25.4M
    const __m128i G = _mm_add_epi8(D, F);
49
25.4M
    const __m128i out = _mm_blendv_epi8(G, A, mask_ag);
50
25.4M
    _mm_storeu_si128((__m128i*)&dst[i], out);
51
25.4M
  }
52
  // Fall-back to C-version for left-overs.
53
12.3M
  if (i != num_pixels) {
54
64.5k
    VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
55
64.5k
  }
56
12.3M
}
57
58
//------------------------------------------------------------------------------
59
60
0
#define ARGB_TO_RGB_SSE41 do {                        \
61
0
  while (num_pixels >= 16) {                          \
62
0
    const __m128i in0 = _mm_loadu_si128(in + 0);      \
63
0
    const __m128i in1 = _mm_loadu_si128(in + 1);      \
64
0
    const __m128i in2 = _mm_loadu_si128(in + 2);      \
65
0
    const __m128i in3 = _mm_loadu_si128(in + 3);      \
66
0
    const __m128i a0 = _mm_shuffle_epi8(in0, perm0);  \
67
0
    const __m128i a1 = _mm_shuffle_epi8(in1, perm1);  \
68
0
    const __m128i a2 = _mm_shuffle_epi8(in2, perm2);  \
69
0
    const __m128i a3 = _mm_shuffle_epi8(in3, perm3);  \
70
0
    const __m128i b0 = _mm_blend_epi16(a0, a1, 0xc0); \
71
0
    const __m128i b1 = _mm_blend_epi16(a1, a2, 0xf0); \
72
0
    const __m128i b2 = _mm_blend_epi16(a2, a3, 0xfc); \
73
0
    _mm_storeu_si128(out + 0, b0);                    \
74
0
    _mm_storeu_si128(out + 1, b1);                    \
75
0
    _mm_storeu_si128(out + 2, b2);                    \
76
0
    in += 4;                                          \
77
0
    out += 3;                                         \
78
0
    num_pixels -= 16;                                 \
79
0
  }                                                   \
80
0
} while (0)
81
82
static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src,
83
0
                                   int num_pixels, uint8_t* WEBP_RESTRICT dst) {
84
0
  const __m128i* in = (const __m128i*)src;
85
0
  __m128i* out = (__m128i*)dst;
86
0
  const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,
87
0
                                      8, 14, 13, 12, -1, -1, -1, -1);
88
0
  const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39);
89
0
  const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e);
90
0
  const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93);
91
92
0
  ARGB_TO_RGB_SSE41;
93
94
  // left-overs
95
0
  if (num_pixels > 0) {
96
0
    VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
97
0
  }
98
0
}
99
100
static void ConvertBGRAToBGR_SSE41(const uint32_t* WEBP_RESTRICT src,
101
0
                                   int num_pixels, uint8_t* WEBP_RESTRICT dst) {
102
0
  const __m128i* in = (const __m128i*)src;
103
0
  __m128i* out = (__m128i*)dst;
104
0
  const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,
105
0
                                      12, 13, 14, -1, -1, -1, -1);
106
0
  const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39);
107
0
  const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e);
108
0
  const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93);
109
110
0
  ARGB_TO_RGB_SSE41;
111
112
  // left-overs
113
0
  if (num_pixels > 0) {
114
0
    VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
115
0
  }
116
0
}
117
118
#undef ARGB_TO_RGB_SSE41
119
120
//------------------------------------------------------------------------------
121
// Entry point
122
123
extern void VP8LDspInitSSE41(void);
124
125
1
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) {
126
1
  VP8LTransformColorInverse = TransformColorInverse_SSE41;
127
1
  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41;
128
1
  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41;
129
130
  // SSE exports for AVX and above.
131
1
  VP8LTransformColorInverse_SSE = TransformColorInverse_SSE41;
132
1
  VP8LConvertBGRAToRGB_SSE = ConvertBGRAToRGB_SSE41;
133
1
}
134
135
#else  // !WEBP_USE_SSE41
136
137
WEBP_DSP_INIT_STUB(VP8LDspInitSSE41)
138
139
#endif  // WEBP_USE_SSE41