Coverage Report

Created: 2025-07-01 06:46

/src/FreeRDP/libfreerdp/primitives/sse/prim_set_sse2.c
Line
Count
Source (jump to first uncovered line)
1
/* FreeRDP: A Remote Desktop Protocol Client
2
 * Optimized routines to set a chunk of memory to a constant.
3
 * vi:ts=4 sw=4:
4
 *
5
 * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
6
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
7
 * not use this file except in compliance with the License. You may obtain
8
 * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
9
 * Unless required by applicable law or agreed to in writing, software
10
 * distributed under the License is distributed on an "AS IS" BASIS,
11
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
 * or implied. See the License for the specific language governing
13
 * permissions and limitations under the License.
14
 *
15
 */
16
17
#include <freerdp/config.h>
18
19
#include <string.h>
20
#include <freerdp/types.h>
21
#include <freerdp/primitives.h>
22
#include <winpr/sysinfo.h>
23
24
#include "prim_internal.h"
25
#include "prim_avxsse.h"
26
#include "prim_set.h"
27
28
/* ========================================================================= */
29
#if defined(SSE_AVX_INTRINSICS_ENABLED)
30
#include <emmintrin.h>
31
32
static primitives_t* generic = NULL;
33
34
static pstatus_t sse2_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 ulen)
35
0
{
36
0
  size_t len = ulen;
37
0
  BYTE byte = 0;
38
0
  BYTE* dptr = NULL;
39
0
  __m128i xmm0;
40
0
  size_t count = 0;
41
42
0
  if (len < 16)
43
0
    return generic->set_8u(val, pDst, ulen);
44
45
0
  byte = val;
46
0
  dptr = pDst;
47
48
  /* Seek 16-byte alignment. */
49
0
  while ((ULONG_PTR)dptr & 0x0f)
50
0
  {
51
0
    *dptr++ = byte;
52
53
0
    if (--len == 0)
54
0
      return PRIMITIVES_SUCCESS;
55
0
  }
56
57
0
  xmm0 = mm_set1_epu8(byte);
58
  /* Cover 256-byte chunks via SSE register stores. */
59
0
  count = len >> 8;
60
0
  len -= count << 8;
61
62
  /* Do 256-byte chunks using one XMM register. */
63
0
  while (count--)
64
0
  {
65
0
    STORE_SI128(dptr, xmm0);
66
0
    dptr += 16;
67
0
    STORE_SI128(dptr, xmm0);
68
0
    dptr += 16;
69
0
    STORE_SI128(dptr, xmm0);
70
0
    dptr += 16;
71
0
    STORE_SI128(dptr, xmm0);
72
0
    dptr += 16;
73
0
    STORE_SI128(dptr, xmm0);
74
0
    dptr += 16;
75
0
    STORE_SI128(dptr, xmm0);
76
0
    dptr += 16;
77
0
    STORE_SI128(dptr, xmm0);
78
0
    dptr += 16;
79
0
    STORE_SI128(dptr, xmm0);
80
0
    dptr += 16;
81
0
    STORE_SI128(dptr, xmm0);
82
0
    dptr += 16;
83
0
    STORE_SI128(dptr, xmm0);
84
0
    dptr += 16;
85
0
    STORE_SI128(dptr, xmm0);
86
0
    dptr += 16;
87
0
    STORE_SI128(dptr, xmm0);
88
0
    dptr += 16;
89
0
    STORE_SI128(dptr, xmm0);
90
0
    dptr += 16;
91
0
    STORE_SI128(dptr, xmm0);
92
0
    dptr += 16;
93
0
    STORE_SI128(dptr, xmm0);
94
0
    dptr += 16;
95
0
    STORE_SI128(dptr, xmm0);
96
0
    dptr += 16;
97
0
  }
98
99
  /* Cover 16-byte chunks via SSE register stores. */
100
0
  count = len >> 4;
101
0
  len -= count << 4;
102
103
  /* Do 16-byte chunks using one XMM register. */
104
0
  while (count--)
105
0
  {
106
0
    STORE_SI128(dptr, xmm0);
107
0
    dptr += 16;
108
0
  }
109
110
  /* Do leftover bytes. */
111
0
  while (len--)
112
0
    *dptr++ = byte;
113
114
0
  return PRIMITIVES_SUCCESS;
115
0
}
116
117
/* ------------------------------------------------------------------------- */
118
static pstatus_t sse2_set_32u(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 ulen)
119
0
{
120
0
  size_t len = ulen;
121
0
  const primitives_t* prim = primitives_get_generic();
122
0
  UINT32* dptr = pDst;
123
0
  __m128i xmm0;
124
0
  size_t count = 0;
125
126
  /* If really short, just do it here. */
127
0
  if (len < 32)
128
0
  {
129
0
    while (len--)
130
0
      *dptr++ = val;
131
132
0
    return PRIMITIVES_SUCCESS;
133
0
  }
134
135
  /* Assure we can reach 16-byte alignment. */
136
0
  if (((ULONG_PTR)dptr & 0x03) != 0)
137
0
  {
138
0
    return prim->set_32u(val, pDst, ulen);
139
0
  }
140
141
  /* Seek 16-byte alignment. */
142
0
  while ((ULONG_PTR)dptr & 0x0f)
143
0
  {
144
0
    *dptr++ = val;
145
146
0
    if (--len == 0)
147
0
      return PRIMITIVES_SUCCESS;
148
0
  }
149
150
0
  xmm0 = mm_set1_epu32(val);
151
  /* Cover 256-byte chunks via SSE register stores. */
152
0
  count = len >> 6;
153
0
  len -= count << 6;
154
155
  /* Do 256-byte chunks using one XMM register. */
156
0
  while (count--)
157
0
  {
158
0
    STORE_SI128(dptr, xmm0);
159
0
    dptr += 4;
160
0
    STORE_SI128(dptr, xmm0);
161
0
    dptr += 4;
162
0
    STORE_SI128(dptr, xmm0);
163
0
    dptr += 4;
164
0
    STORE_SI128(dptr, xmm0);
165
0
    dptr += 4;
166
0
    STORE_SI128(dptr, xmm0);
167
0
    dptr += 4;
168
0
    STORE_SI128(dptr, xmm0);
169
0
    dptr += 4;
170
0
    STORE_SI128(dptr, xmm0);
171
0
    dptr += 4;
172
0
    STORE_SI128(dptr, xmm0);
173
0
    dptr += 4;
174
0
    STORE_SI128(dptr, xmm0);
175
0
    dptr += 4;
176
0
    STORE_SI128(dptr, xmm0);
177
0
    dptr += 4;
178
0
    STORE_SI128(dptr, xmm0);
179
0
    dptr += 4;
180
0
    STORE_SI128(dptr, xmm0);
181
0
    dptr += 4;
182
0
    STORE_SI128(dptr, xmm0);
183
0
    dptr += 4;
184
0
    STORE_SI128(dptr, xmm0);
185
0
    dptr += 4;
186
0
    STORE_SI128(dptr, xmm0);
187
0
    dptr += 4;
188
0
    STORE_SI128(dptr, xmm0);
189
0
    dptr += 4;
190
0
  }
191
192
  /* Cover 16-byte chunks via SSE register stores. */
193
0
  count = len >> 2;
194
0
  len -= count << 2;
195
196
  /* Do 16-byte chunks using one XMM register. */
197
0
  while (count--)
198
0
  {
199
0
    STORE_SI128(dptr, xmm0);
200
0
    dptr += 4;
201
0
  }
202
203
  /* Do leftover bytes. */
204
0
  while (len--)
205
0
    *dptr++ = val;
206
207
0
  return PRIMITIVES_SUCCESS;
208
0
}
209
210
/* ------------------------------------------------------------------------- */
211
static pstatus_t sse2_set_32s(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len)
212
0
{
213
0
  UINT32 uval = *((UINT32*)&val);
214
0
  return sse2_set_32u(uval, (UINT32*)pDst, len);
215
0
}
216
#endif
217
218
/* ------------------------------------------------------------------------- */
219
void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims)
220
0
{
221
0
#if defined(SSE_AVX_INTRINSICS_ENABLED)
222
0
  generic = primitives_get_generic();
223
224
  /* Pick tuned versions if possible. */
225
226
0
  WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
227
0
  prims->set_8u = sse2_set_8u;
228
0
  prims->set_32s = sse2_set_32s;
229
0
  prims->set_32u = sse2_set_32u;
230
231
#else
232
  WLog_VRB(PRIM_TAG, "undefined WITH_SIMD or SSE2 intrinsics not available");
233
  WINPR_UNUSED(prims);
234
#endif
235
0
}