Coverage Report

Created: 2025-08-26 06:52

/src/astc-encoder/Source/astcenc_vecmathlib_common_4.h
Line
Count
Source (jump to first uncovered line)
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2020-2025 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
 * @brief Generic 4x32-bit vector functions.
20
 *
21
 * This module implements generic 4-wide vector functions that are valid for
22
 * all instruction sets, typically implemented using lower level 4-wide
23
 * operations that are ISA-specific.
24
 */
25
26
#ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
27
#define ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
28
29
#ifndef ASTCENC_SIMD_INLINE
30
  #error "Include astcenc_vecmathlib.h, do not include directly"
31
#endif
32
33
#include <cstdio>
34
#include <limits>
35
36
// ============================================================================
37
// vint4 operators and functions
38
// ============================================================================
39
40
/**
41
 * @brief Overload: vector by scalar addition.
42
 */
43
ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, int b)
44
0
{
45
0
  return a + vint4(b);
46
0
}
47
48
/**
49
 * @brief Overload: vector by vector incremental addition.
50
 */
51
ASTCENC_SIMD_INLINE vint4& operator+=(vint4& a, const vint4& b)
52
0
{
53
0
  a = a + b;
54
0
  return a;
55
0
}
56
57
/**
58
 * @brief Overload: vector by scalar subtraction.
59
 */
60
ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, int b)
61
0
{
62
0
  return a - vint4(b);
63
0
}
64
65
/**
66
 * @brief Overload: vector by scalar multiplication.
67
 */
68
ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, int b)
69
0
{
70
0
  return a * vint4(b);
71
0
}
72
73
/**
74
 * @brief Overload: vector by scalar bitwise or.
75
 */
76
ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, int b)
77
0
{
78
0
  return a | vint4(b);
79
0
}
80
81
/**
82
 * @brief Overload: vector by scalar bitwise and.
83
 */
84
ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, int b)
85
0
{
86
0
  return a & vint4(b);
87
0
}
88
89
/**
90
 * @brief Overload: vector by scalar bitwise xor.
91
 */
92
ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, int b)
93
0
{
94
0
  return a ^ vint4(b);
95
0
}
96
97
/**
98
 * @brief Return the clamped value between min and max.
99
 */
100
ASTCENC_SIMD_INLINE vint4 clamp(int minv, int maxv, vint4 a)
101
0
{
102
0
  return min(max(a, vint4(minv)), vint4(maxv));
103
0
}
104
105
/**
106
 * @brief Return the horizontal sum of RGB vector lanes as a scalar.
107
 */
108
ASTCENC_SIMD_INLINE int hadd_rgb_s(vint4 a)
109
0
{
110
0
  return a.lane<0>() + a.lane<1>() + a.lane<2>();
111
0
}
112
113
/**
114
 * @brief Return the horizontal minimum of a vector.
115
 */
116
ASTCENC_SIMD_INLINE int hmin_s(vint4 a)
117
0
{
118
0
  return hmin(a).lane<0>();
119
0
}
120
121
/**
122
 * @brief Generate a vint4 from a size_t.
123
 */
124
 ASTCENC_SIMD_INLINE vint4 vint4_from_size(size_t a)
125
0
 {
126
0
  assert(a <= std::numeric_limits<int>::max());
127
0
  return vint4(static_cast<int>(a));
128
0
 }
129
130
/**
131
 * @brief Return the horizontal maximum of a vector.
132
 */
133
ASTCENC_SIMD_INLINE int hmax_s(vint4 a)
134
0
{
135
0
  return hmax(a).lane<0>();
136
0
}
137
138
// ============================================================================
139
// vfloat4 operators and functions
140
// ============================================================================
141
142
/**
143
 * @brief Overload: vector by vector incremental addition.
144
 */
145
ASTCENC_SIMD_INLINE vfloat4& operator+=(vfloat4& a, const vfloat4& b)
146
0
{
147
0
  a = a + b;
148
0
  return a;
149
0
}
150
151
/**
152
 * @brief Overload: vector by scalar addition.
153
 */
154
ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, float b)
155
0
{
156
0
  return a + vfloat4(b);
157
0
}
158
159
/**
160
 * @brief Overload: vector by scalar subtraction.
161
 */
162
ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, float b)
163
0
{
164
0
  return a - vfloat4(b);
165
0
}
166
167
/**
168
 * @brief Overload: vector by scalar multiplication.
169
 */
170
ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, float b)
171
0
{
172
0
  return a * vfloat4(b);
173
0
}
174
175
/**
176
 * @brief Overload: scalar by vector multiplication.
177
 */
178
ASTCENC_SIMD_INLINE vfloat4 operator*(float a, vfloat4 b)
179
0
{
180
0
  return vfloat4(a) * b;
181
0
}
182
183
/**
184
 * @brief Overload: vector by scalar division.
185
 */
186
ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, float b)
187
0
{
188
0
  return a / vfloat4(b);
189
0
}
190
191
/**
192
 * @brief Overload: scalar by vector division.
193
 */
194
ASTCENC_SIMD_INLINE vfloat4 operator/(float a, vfloat4 b)
195
0
{
196
0
  return vfloat4(a) / b;
197
0
}
198
199
/**
200
 * @brief Return the min vector of a vector and a scalar.
201
 *
202
 * If either lane value is NaN, @c b will be returned for that lane.
203
 */
204
ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, float b)
205
0
{
206
0
  return min(a, vfloat4(b));
207
0
}
208
209
/**
210
 * @brief Return the max vector of a vector and a scalar.
211
 *
212
 * If either lane value is NaN, @c b will be returned for that lane.
213
 */
214
ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, float b)
215
0
{
216
0
  return max(a, vfloat4(b));
217
0
}
218
219
/**
220
 * @brief Return the clamped value between min and max.
221
 *
222
 * It is assumed that neither @c min nor @c max are NaN values. If @c a is NaN
223
 * then @c min will be returned for that lane.
224
 */
225
ASTCENC_SIMD_INLINE vfloat4 clamp(float minv, float maxv, vfloat4 a)
226
0
{
227
0
  // Do not reorder - second operand will return if either is NaN
228
0
  return min(max(a, minv), maxv);
229
0
}
230
231
/**
232
 * @brief Return the clamped value between 0.0f and 1.0f.
233
 *
234
 * If @c a is NaN then zero will be returned for that lane.
235
 */
236
ASTCENC_SIMD_INLINE vfloat4 clampzo(vfloat4 a)
237
0
{
238
0
  // Do not reorder - second operand will return if either is NaN
239
0
  return min(max(a, vfloat4::zero()), 1.0f);
240
0
}
241
242
/**
243
 * @brief Return the horizontal minimum of a vector.
244
 */
245
ASTCENC_SIMD_INLINE float hmin_s(vfloat4 a)
246
0
{
247
0
  return hmin(a).lane<0>();
248
0
}
249
250
/**
251
 * @brief Return the horizontal min of RGB vector lanes as a scalar.
252
 */
253
ASTCENC_SIMD_INLINE float hmin_rgb_s(vfloat4 a)
254
0
{
255
0
  a.set_lane<3>(a.lane<0>());
256
0
  return hmin_s(a);
257
0
}
258
259
/**
260
 * @brief Return the horizontal maximum of a vector.
261
 */
262
ASTCENC_SIMD_INLINE float hmax_s(vfloat4 a)
263
0
{
264
0
  return hmax(a).lane<0>();
265
0
}
266
267
/**
268
 * @brief Accumulate lane-wise sums for a vector.
269
 */
270
ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a)
271
0
{
272
0
  accum = accum + a;
273
0
}
274
275
/**
276
 * @brief Accumulate lane-wise sums for a masked vector.
277
 */
278
ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a, vmask4 m)
279
0
{
280
0
  a = select(vfloat4::zero(), a, m);
281
0
  haccumulate(accum, a);
282
0
}
283
284
/**
285
 * @brief Return the horizontal sum of RGB vector lanes as a scalar.
286
 */
287
ASTCENC_SIMD_INLINE float hadd_rgb_s(vfloat4 a)
288
0
{
289
0
  return a.lane<0>() + a.lane<1>() + a.lane<2>();
290
0
}
291
292
#if !defined(ASTCENC_USE_NATIVE_DOT_PRODUCT)
293
294
/**
295
 * @brief Return the dot product for the full 4 lanes, returning scalar.
296
 */
297
ASTCENC_SIMD_INLINE float dot_s(vfloat4 a, vfloat4 b)
298
0
{
299
0
  vfloat4 m = a * b;
300
0
  return hadd_s(m);
301
0
}
302
303
/**
304
 * @brief Return the dot product for the full 4 lanes, returning vector.
305
 */
306
ASTCENC_SIMD_INLINE vfloat4 dot(vfloat4 a, vfloat4 b)
307
0
{
308
0
  vfloat4 m = a * b;
309
0
  return vfloat4(hadd_s(m));
310
0
}
311
312
/**
313
 * @brief Return the dot product for the bottom 3 lanes, returning scalar.
314
 */
315
ASTCENC_SIMD_INLINE float dot3_s(vfloat4 a, vfloat4 b)
316
0
{
317
0
  vfloat4 m = a * b;
318
0
  return hadd_rgb_s(m);
319
0
}
320
321
/**
322
 * @brief Return the dot product for the bottom 3 lanes, returning vector.
323
 */
324
ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b)
325
0
{
326
0
  vfloat4 m = a * b;
327
0
  float d3 = hadd_rgb_s(m);
328
0
  return vfloat4(d3, d3, d3, 0.0f);
329
0
}
330
331
#endif
332
333
#if !defined(ASTCENC_USE_NATIVE_POPCOUNT)
334
335
/**
336
 * @brief Population bit count.
337
 *
338
 * @param v   The value to population count.
339
 *
340
 * @return The number of 1 bits.
341
 */
342
static inline int popcount(uint64_t v)
343
0
{
344
0
  uint64_t mask1 = 0x5555555555555555ULL;
345
0
  uint64_t mask2 = 0x3333333333333333ULL;
346
0
  uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
347
0
  v -= (v >> 1) & mask1;
348
0
  v = (v & mask2) + ((v >> 2) & mask2);
349
0
  v += v >> 4;
350
0
  v &= mask3;
351
0
  v *= 0x0101010101010101ULL;
352
0
  v >>= 56;
353
0
  return static_cast<int>(v);
354
0
}
Unexecuted instantiation: fuzz_astc_physical_to_symbolic.cpp:popcount(unsigned long)
Unexecuted instantiation: astcenc_block_sizes.cpp:popcount(unsigned long)
Unexecuted instantiation: astcenc_integer_sequence.cpp:popcount(unsigned long)
Unexecuted instantiation: astcenc_mathlib.cpp:popcount(unsigned long)
Unexecuted instantiation: astcenc_partition_tables.cpp:popcount(unsigned long)
Unexecuted instantiation: astcenc_percentile_tables.cpp:popcount(unsigned long)
Unexecuted instantiation: astcenc_symbolic_physical.cpp:popcount(unsigned long)
Unexecuted instantiation: astcenc_weight_quant_xfer_tables.cpp:popcount(unsigned long)
Unexecuted instantiation: astcenc_quantization.cpp:popcount(unsigned long)
355
356
#endif
357
358
/**
359
 * @brief Apply signed bit transfer.
360
 *
361
 * @param input0   The first encoded endpoint.
362
 * @param input1   The second encoded endpoint.
363
 */
364
static ASTCENC_SIMD_INLINE void bit_transfer_signed(
365
  vint4& input0,
366
  vint4& input1
367
0
) {
368
0
  input1 = lsr<1>(input1) | (input0 & 0x80);
369
0
  input0 = lsr<1>(input0) & 0x3F;
370
0
371
0
  vmask4 mask = (input0 & 0x20) != vint4::zero();
372
0
  input0 = select(input0, input0 - 0x40, mask);
373
0
}
Unexecuted instantiation: fuzz_astc_physical_to_symbolic.cpp:bit_transfer_signed(vint4&, vint4&)
Unexecuted instantiation: astcenc_block_sizes.cpp:bit_transfer_signed(vint4&, vint4&)
Unexecuted instantiation: astcenc_integer_sequence.cpp:bit_transfer_signed(vint4&, vint4&)
Unexecuted instantiation: astcenc_mathlib.cpp:bit_transfer_signed(vint4&, vint4&)
Unexecuted instantiation: astcenc_partition_tables.cpp:bit_transfer_signed(vint4&, vint4&)
Unexecuted instantiation: astcenc_percentile_tables.cpp:bit_transfer_signed(vint4&, vint4&)
Unexecuted instantiation: astcenc_symbolic_physical.cpp:bit_transfer_signed(vint4&, vint4&)
Unexecuted instantiation: astcenc_weight_quant_xfer_tables.cpp:bit_transfer_signed(vint4&, vint4&)
Unexecuted instantiation: astcenc_quantization.cpp:bit_transfer_signed(vint4&, vint4&)
374
375
/**
376
 * @brief Debug function to print a vector of ints.
377
 */
378
ASTCENC_SIMD_INLINE void print(vint4 a)
379
0
{
380
0
  ASTCENC_ALIGNAS int v[4];
381
0
  storea(a, v);
382
0
  printf("v4_i32:\n  %8d %8d %8d %8d\n",
383
0
         v[0], v[1], v[2], v[3]);
384
0
}
385
386
/**
387
 * @brief Debug function to print a vector of ints.
388
 */
389
ASTCENC_SIMD_INLINE void printx(vint4 a)
390
0
{
391
0
  ASTCENC_ALIGNAS int v[4];
392
0
  storea(a, v);
393
0
394
0
  unsigned int uv[4];
395
0
  std::memcpy(uv, v, sizeof(int) * 4);
396
0
397
0
  printf("v4_i32:\n  %08x %08x %08x %08x\n",
398
0
    uv[0], uv[1], uv[2], uv[3]);
399
0
}
400
401
/**
402
 * @brief Debug function to print a vector of floats.
403
 */
404
ASTCENC_SIMD_INLINE void print(vfloat4 a)
405
0
{
406
0
  ASTCENC_ALIGNAS float v[4];
407
0
  storea(a, v);
408
0
  printf("v4_f32:\n  %0.4f %0.4f %0.4f %0.4f\n",
409
0
         static_cast<double>(v[0]), static_cast<double>(v[1]),
410
0
         static_cast<double>(v[2]), static_cast<double>(v[3]));
411
0
}
412
413
/**
414
 * @brief Debug function to print a vector of masks.
415
 */
416
ASTCENC_SIMD_INLINE void print(vmask4 a)
417
0
{
418
0
  print(select(vint4(0), vint4(1), a));
419
0
}
420
421
#endif // #ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED