Coverage Report

Created: 2021-08-22 09:07

/src/skia/src/gpu/GrVx.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2020 Google LLC.
3
 *
4
 * Use of this source code is governed by a BSD-style license that can be
5
 * found in the LICENSE file.
6
 */
7
8
#ifndef GrVx_DEFINED
9
#define GrVx_DEFINED
10
11
#include "include/core/SkTypes.h"
12
#include "include/private/SkVx.h"
13
14
// grvx is Ganesh's addendum to skvx, Skia's SIMD library. Here we introduce functions that are
15
// approximate and/or have LSB differences from platform to platform (e.g., by using hardware FMAs
16
// when available). When a function is approximate, its error range is well documented and tested.
17
namespace grvx {
18
19
// Allow floating point contraction. e.g., allow a*x + y to be compiled to a single FMA even though
20
// it introduces LSB differences on platforms that don't have an FMA instruction.
21
#if defined(__clang__)
22
    #pragma STDC FP_CONTRACT ON
23
#endif
24
25
// Use familiar type names and functions from SkSL and GLSL.
26
template<int N> using vec = skvx::Vec<N, float>;
27
using float2 = vec<2>;
28
using float4 = vec<4>;
29
30
template<int N> using ivec = skvx::Vec<N, int32_t>;
31
using int2 = ivec<2>;
32
using int4 = ivec<4>;
33
34
template<int N> using uvec = skvx::Vec<N, uint32_t>;
35
using uint2 = uvec<2>;
36
using uint4 = uvec<4>;
37
38
0
static SK_ALWAYS_INLINE float dot(float2 a, float2 b) {
39
0
    float2 ab = a*b;
40
0
    return ab[0] + ab[1];
41
0
}
Unexecuted instantiation: AAConvexPathRenderer.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: AAHairLinePathRenderer.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: AALinearizingConvexPathRenderer.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: DefaultPathRenderer.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: TriangulatingPathRenderer.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: SkShadowTessellator.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrDistanceFieldGenFromVector.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrAAConvexTessellator.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathUtils.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrTriangulator.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: AtlasPathRenderer.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrFillRRectOp.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: TessellationPathRenderer.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrAtlasRenderTask.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: PathInnerTriangulateOp.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: PathStencilCoverOp.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: PathTessellateOp.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: StrokeTessellateOp.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathCurveTessellator.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathWedgeTessellator.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeFixedCountTessellator.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeHardwareTessellator.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathTessellationShader_Hardware.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathTessellationShader_MiddleOut.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeTessellationShader.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeTessellationShader_HardwareImpl.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeTessellationShader_InstancedImpl.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: FuzzTriangulation.cpp:grvx::dot(skvx::Vec<2, float>, skvx::Vec<2, float>)
42
43
0
static SK_ALWAYS_INLINE float cross(float2 a, float2 b) {
44
0
    float2 x = a*skvx::shuffle<1,0>(b);
45
0
    return x[0] - x[1];
46
0
}
Unexecuted instantiation: AAConvexPathRenderer.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: AAHairLinePathRenderer.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: AALinearizingConvexPathRenderer.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: DefaultPathRenderer.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: TriangulatingPathRenderer.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: SkShadowTessellator.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrDistanceFieldGenFromVector.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrAAConvexTessellator.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathUtils.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrTriangulator.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: AtlasPathRenderer.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrFillRRectOp.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: TessellationPathRenderer.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrAtlasRenderTask.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: PathInnerTriangulateOp.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: PathStencilCoverOp.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: PathTessellateOp.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: StrokeTessellateOp.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathCurveTessellator.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathWedgeTessellator.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeFixedCountTessellator.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeHardwareTessellator.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathTessellationShader_Hardware.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrPathTessellationShader_MiddleOut.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeTessellationShader.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeTessellationShader_HardwareImpl.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: GrStrokeTessellationShader_InstancedImpl.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
Unexecuted instantiation: FuzzTriangulation.cpp:grvx::cross(skvx::Vec<2, float>, skvx::Vec<2, float>)
47
48
// Returns f*m + a. The actual implementation may or may not be fused, depending on hardware
49
// support. We call this method "fast_madd" to draw attention to the fact that the operation may
50
// give different results on different platforms.
51
122k
template<int N> SK_ALWAYS_INLINE vec<N> fast_madd(vec<N> f, vec<N> m, vec<N> a) {
52
#if FP_FAST_FMAF
53
    return skvx::fma(f,m,a);
54
#else
55
122k
    return f*m + a;
56
122k
#endif
57
122k
}
skvx::Vec<2, float> grvx::fast_madd<2>(skvx::Vec<2, float>, skvx::Vec<2, float>, skvx::Vec<2, float>)
Line
Count
Source
51
106k
template<int N> SK_ALWAYS_INLINE vec<N> fast_madd(vec<N> f, vec<N> m, vec<N> a) {
52
#if FP_FAST_FMAF
53
    return skvx::fma(f,m,a);
54
#else
55
106k
    return f*m + a;
56
106k
#endif
57
106k
}
skvx::Vec<4, float> grvx::fast_madd<4>(skvx::Vec<4, float>, skvx::Vec<4, float>, skvx::Vec<4, float>)
Line
Count
Source
51
15.3k
template<int N> SK_ALWAYS_INLINE vec<N> fast_madd(vec<N> f, vec<N> m, vec<N> a) {
52
#if FP_FAST_FMAF
53
    return skvx::fma(f,m,a);
54
#else
55
15.3k
    return f*m + a;
56
15.3k
#endif
57
15.3k
}
58
59
// Approximates the inverse cosine of x within 0.96 degrees using the rational polynomial:
60
//
61
//     acos(x) ~= (bx^3 + ax) / (dx^4 + cx^2 + 1) + pi/2
62
//
63
// See: https://stackoverflow.com/a/36387954
64
//
65
// For a proof of max error, see the "grvx_approx_acos" unit test.
66
//
67
// NOTE: This function deviates immediately from pi and 0 outside -1 and 1. (The derivatives are
68
// infinite at -1 and 1). So the input must still be clamped between -1 and 1.
69
0
#define GRVX_APPROX_ACOS_MAX_ERROR SkDegreesToRadians(.96f)
70
0
template<int N> SK_ALWAYS_INLINE vec<N> approx_acos(vec<N> x) {
71
0
    constexpr static float a = -0.939115566365855f;
72
0
    constexpr static float b =  0.9217841528914573f;
73
0
    constexpr static float c = -1.2845906244690837f;
74
0
    constexpr static float d =  0.295624144969963174f;
75
0
    constexpr static float pi_over_2 = 1.5707963267948966f;
76
0
    vec<N> xx = x*x;
77
0
    vec<N> numer = fast_madd<N>(b,xx,a);
78
0
    vec<N> denom = fast_madd<N>(xx, fast_madd<N>(d,xx,c), 1);
79
0
    return fast_madd<N>(x, numer/denom, pi_over_2);
80
0
}
81
82
// Approximates the angle between vectors a and b within .96 degrees (GRVX_FAST_ACOS_MAX_ERROR).
83
// a (and b) represent "N" (Nx2/2) 2d vectors in SIMD, with the x values found in a.lo, and the
84
// y values in a.hi.
85
//
86
// Due to fp32 overflow, this method is only valid for magnitudes in the range (2^-31, 2^31)
87
// exclusive. Results are undefined if the inputs fall outside this range.
88
//
89
// NOTE: If necessary, we can extend our valid range to 2^(+/-63) by normalizing a and b separately.
90
// i.e.: "cosTheta = dot(a,b) / sqrt(dot(a,a)) / sqrt(dot(b,b))".
91
template<int Nx2>
92
SK_ALWAYS_INLINE vec<Nx2/2> approx_angle_between_vectors(vec<Nx2> a, vec<Nx2> b) {
93
    auto aa=a*a, bb=b*b, ab=a*b;
94
    auto cosTheta = (ab.lo + ab.hi) / skvx::sqrt((aa.lo + aa.hi) * (bb.lo + bb.hi));
95
    // Clamp cosTheta such that if it is NaN (e.g., if a or b was 0), then we return acos(1) = 0.
96
    cosTheta = skvx::max(skvx::min(1, cosTheta), -1);
97
    return approx_acos(cosTheta);
98
}
99
100
// De-interleaving load of 4 vectors.
101
//
102
// WARNING: These are really only supported well on NEON. Consider restructuring your data before
103
// resorting to these methods.
104
template<typename T>
105
SK_ALWAYS_INLINE void strided_load4(const T* v, skvx::Vec<1,T>& a, skvx::Vec<1,T>& b,
106
                                    skvx::Vec<1,T>& c, skvx::Vec<1,T>& d) {
107
    a.val = v[0];
108
    b.val = v[1];
109
    c.val = v[2];
110
    d.val = v[3];
111
}
112
template<int N, typename T>
113
SK_ALWAYS_INLINE typename std::enable_if<N >= 2, void>::type
114
strided_load4(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b, skvx::Vec<N,T>& c,
115
              skvx::Vec<N,T>& d) {
116
    strided_load4(v, a.lo, b.lo, c.lo, d.lo);
117
    strided_load4(v + 4*(N/2), a.hi, b.hi, c.hi, d.hi);
118
}
119
#if !defined(SKNX_NO_SIMD)
120
#if defined(__ARM_NEON)
121
#define IMPL_LOAD4_TRANSPOSED(N, T, VLD) \
122
template<> \
123
SK_ALWAYS_INLINE void strided_load4(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b, \
124
                                    skvx::Vec<N,T>& c, skvx::Vec<N,T>& d) { \
125
    auto mat = VLD(v); \
126
    a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
127
    b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \
128
    c = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[2]); \
129
    d = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[3]); \
130
}
131
IMPL_LOAD4_TRANSPOSED(2, uint32_t, vld4_u32);
132
IMPL_LOAD4_TRANSPOSED(4, uint16_t, vld4_u16);
133
IMPL_LOAD4_TRANSPOSED(8, uint8_t, vld4_u8);
134
IMPL_LOAD4_TRANSPOSED(2, int32_t, vld4_s32);
135
IMPL_LOAD4_TRANSPOSED(4, int16_t, vld4_s16);
136
IMPL_LOAD4_TRANSPOSED(8, int8_t, vld4_s8);
137
IMPL_LOAD4_TRANSPOSED(2, float, vld4_f32);
138
IMPL_LOAD4_TRANSPOSED(4, uint32_t, vld4q_u32);
139
IMPL_LOAD4_TRANSPOSED(8, uint16_t, vld4q_u16);
140
IMPL_LOAD4_TRANSPOSED(16, uint8_t, vld4q_u8);
141
IMPL_LOAD4_TRANSPOSED(4, int32_t, vld4q_s32);
142
IMPL_LOAD4_TRANSPOSED(8, int16_t, vld4q_s16);
143
IMPL_LOAD4_TRANSPOSED(16, int8_t, vld4q_s8);
144
IMPL_LOAD4_TRANSPOSED(4, float, vld4q_f32);
145
#undef IMPL_LOAD4_TRANSPOSED
146
#elif defined(__SSE__)
147
template<>
148
0
SK_ALWAYS_INLINE void strided_load4(const float* v, float4& a, float4& b, float4& c, float4& d) {
149
0
    using skvx::bit_pun;
150
0
    __m128 a_ = _mm_loadu_ps(v);
151
0
    __m128 b_ = _mm_loadu_ps(v+4);
152
0
    __m128 c_ = _mm_loadu_ps(v+8);
153
0
    __m128 d_ = _mm_loadu_ps(v+12);
154
0
    _MM_TRANSPOSE4_PS(a_, b_, c_, d_);
155
0
    a = bit_pun<float4>(a_);
156
0
    b = bit_pun<float4>(b_);
157
0
    c = bit_pun<float4>(c_);
158
0
    d = bit_pun<float4>(d_);
159
0
}
160
#endif
161
#endif
162
163
// De-interleaving load of 2 vectors.
164
//
165
// WARNING: These are really only supported well on NEON. Consider restructuring your data before
166
// resorting to these methods.
167
template<typename T>
168
0
SK_ALWAYS_INLINE void strided_load2(const T* v, skvx::Vec<1,T>& a, skvx::Vec<1,T>& b) {
169
0
    a.val = v[0];
170
0
    b.val = v[1];
171
0
}
172
template<int N, typename T>
173
SK_ALWAYS_INLINE typename std::enable_if<N >= 2, void>::type
174
0
strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) {
175
0
    strided_load2(v, a.lo, b.lo);
176
0
    strided_load2(v + 2*(N/2), a.hi, b.hi);
177
0
}
Unexecuted instantiation: std::__1::enable_if<(4)>=(2), void>::type grvx::strided_load2<4, float>(float const*, skvx::Vec<4, float>&, skvx::Vec<4, float>&)
Unexecuted instantiation: std::__1::enable_if<(2)>=(2), void>::type grvx::strided_load2<2, float>(float const*, skvx::Vec<2, float>&, skvx::Vec<2, float>&)
178
#if !defined(SKNX_NO_SIMD)
179
#if defined(__ARM_NEON)
180
#define IMPL_LOAD2_TRANSPOSED(N, T, VLD) \
181
template<> \
182
SK_ALWAYS_INLINE void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) { \
183
    auto mat = VLD(v); \
184
    a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
185
    b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \
186
}
187
IMPL_LOAD2_TRANSPOSED(2, uint32_t, vld2_u32);
188
IMPL_LOAD2_TRANSPOSED(4, uint16_t, vld2_u16);
189
IMPL_LOAD2_TRANSPOSED(8, uint8_t, vld2_u8);
190
IMPL_LOAD2_TRANSPOSED(2, int32_t, vld2_s32);
191
IMPL_LOAD2_TRANSPOSED(4, int16_t, vld2_s16);
192
IMPL_LOAD2_TRANSPOSED(8, int8_t, vld2_s8);
193
IMPL_LOAD2_TRANSPOSED(2, float, vld2_f32);
194
IMPL_LOAD2_TRANSPOSED(4, uint32_t, vld2q_u32);
195
IMPL_LOAD2_TRANSPOSED(8, uint16_t, vld2q_u16);
196
IMPL_LOAD2_TRANSPOSED(16, uint8_t, vld2q_u8);
197
IMPL_LOAD2_TRANSPOSED(4, int32_t, vld2q_s32);
198
IMPL_LOAD2_TRANSPOSED(8, int16_t, vld2q_s16);
199
IMPL_LOAD2_TRANSPOSED(16, int8_t, vld2q_s8);
200
IMPL_LOAD2_TRANSPOSED(4, float, vld2q_f32);
201
#undef IMPL_LOAD2_TRANSPOSED
202
#endif
203
#endif
204
205
#if defined(__clang__)
206
    #pragma STDC FP_CONTRACT DEFAULT
207
#endif
208
209
};  // namespace grvx
210
211
#endif