/src/skia/src/opts/SkBlitMask_opts.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright 2015 Google Inc. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license that can be |
5 | | * found in the LICENSE file. |
6 | | */ |
7 | | |
8 | | #ifndef SkBlitMask_opts_DEFINED |
9 | | #define SkBlitMask_opts_DEFINED |
10 | | |
11 | | #include "include/private/base/SkFeatures.h" |
12 | | #include "src/core/Sk4px.h" |
13 | | |
14 | | #if defined(SK_ARM_HAS_NEON) |
15 | | #include <arm_neon.h> |
16 | | #endif |
17 | | |
18 | | namespace SK_OPTS_NS { |
19 | | |
20 | | #if defined(SK_ARM_HAS_NEON) |
21 | | // The Sk4px versions below will work fine with NEON, but we have had many indications |
22 | | // that it doesn't perform as well as this NEON-specific code. TODO(mtklein): why? |
23 | | |
24 | | #define NEON_A (SK_A32_SHIFT / 8) |
25 | | #define NEON_R (SK_R32_SHIFT / 8) |
26 | | #define NEON_G (SK_G32_SHIFT / 8) |
27 | | #define NEON_B (SK_B32_SHIFT / 8) |
28 | | |
29 | | static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) { |
30 | | return vaddw_u8(vdupq_n_u16(1), alpha); |
31 | | } |
32 | | |
33 | | static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) { |
34 | | return vshrn_n_u16(vmovl_u8(color) * scale, 8); |
35 | | } |
36 | | |
37 | | static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) { |
38 | | uint8x8x4_t ret; |
39 | | |
40 | | ret.val[0] = SkAlphaMul_neon8(color.val[0], scale); |
41 | | ret.val[1] = SkAlphaMul_neon8(color.val[1], scale); |
42 | | ret.val[2] = SkAlphaMul_neon8(color.val[2], scale); |
43 | | ret.val[3] = SkAlphaMul_neon8(color.val[3], scale); |
44 | | |
45 | | return ret; |
46 | | } |
47 | | |
48 | | |
49 | | template <bool isColor> |
50 | | static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, |
51 | | const void* SK_RESTRICT maskPtr, size_t maskRB, |
52 | | SkColor color, int width, int height) { |
53 | | SkPMColor pmc = SkPreMultiplyColor(color); |
54 | | SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
55 | | const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
56 | | uint8x8x4_t vpmc; |
57 | | |
58 | | // Nine patch may set maskRB to 0 to blit the same row repeatedly. |
59 | | ptrdiff_t mask_adjust = (ptrdiff_t)maskRB - width; |
60 | | dstRB -= (width << 2); |
61 | | |
62 | | if (width >= 8) { |
63 | | vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); |
64 | | vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); |
65 | | vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); |
66 | | vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); |
67 | | } |
68 | | do { |
69 | | int w = width; |
70 | | while (w >= 8) { |
71 | | uint8x8_t vmask = vld1_u8(mask); |
72 | | uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); |
73 | | if (isColor) { |
74 | | vscale = vsubw_u8(vdupq_n_u16(256), |
75 | | SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); |
76 | | } else { |
77 | | vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
78 | | } |
79 | | uint8x8x4_t vdev = vld4_u8((uint8_t*)device); |
80 | | |
81 | | vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) |
82 | | + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); |
83 | | vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) |
84 | | + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); |
85 | | vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) |
86 | | + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); |
87 | | vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) |
88 | | + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); |
89 | | |
90 | | vst4_u8((uint8_t*)device, vdev); |
91 | | |
92 | | mask += 8; |
93 | | device += 8; |
94 | | w -= 8; |
95 | | } |
96 | | |
97 | | while (w--) { |
98 | | unsigned aa = *mask++; |
99 | | if (isColor) { |
100 | | *device = SkBlendARGB32(pmc, *device, aa); |
101 | | } else { |
102 | | *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) |
103 | | + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
104 | | } |
105 | | device += 1; |
106 | | } |
107 | | |
108 | | device = (uint32_t*)((char*)device + dstRB); |
109 | | mask += mask_adjust; |
110 | | |
111 | | } while (--height != 0); |
112 | | } |
113 | | |
114 | | static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, |
115 | | const SkAlpha* mask, size_t maskRB, |
116 | | SkColor color, int w, int h) { |
117 | | D32_A8_Opaque_Color_neon<true>(dst, dstRB, mask, maskRB, color, w, h); |
118 | | } |
119 | | |
120 | | // As above, but made slightly simpler by requiring that color is opaque. |
121 | | static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, |
122 | | const SkAlpha* mask, size_t maskRB, |
123 | | SkColor color, int w, int h) { |
124 | | D32_A8_Opaque_Color_neon<false>(dst, dstRB, mask, maskRB, color, w, h); |
125 | | } |
126 | | |
127 | | // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. |
128 | | static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, |
129 | | const SkAlpha* maskPtr, size_t maskRB, |
130 | | int width, int height) { |
131 | | SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
132 | | const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
133 | | |
134 | | // Nine patch may set maskRB to 0 to blit the same row repeatedly. |
135 | | ptrdiff_t mask_adjust = (ptrdiff_t)maskRB - width; |
136 | | dstRB -= (width << 2); |
137 | | do { |
138 | | int w = width; |
139 | | while (w >= 8) { |
140 | | uint8x8_t vmask = vld1_u8(mask); |
141 | | uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
142 | | uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); |
143 | | |
144 | | vdevice = SkAlphaMulQ_neon8(vdevice, vscale); |
145 | | vdevice.val[NEON_A] += vmask; |
146 | | |
147 | | vst4_u8((uint8_t*)device, vdevice); |
148 | | |
149 | | mask += 8; |
150 | | device += 8; |
151 | | w -= 8; |
152 | | } |
153 | | while (w-- > 0) { |
154 | | unsigned aa = *mask++; |
155 | | *device = (aa << SK_A32_SHIFT) |
156 | | + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
157 | | device += 1; |
158 | | } |
159 | | device = (uint32_t*)((char*)device + dstRB); |
160 | | mask += mask_adjust; |
161 | | } while (--height != 0); |
162 | | } |
163 | | |
164 | | #elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LSX |
165 | | #include <lsxintrin.h> |
166 | | |
167 | | static __m128i SkAlphaMul_lsx(__m128i x, __m128i y) { |
168 | | __m128i tmp = __lsx_vmul_h(x, y); |
169 | | __m128i mask = __lsx_vreplgr2vr_h(0xff00); |
170 | | return __lsx_vsrlri_h(__lsx_vand_v(tmp, mask), 8); |
171 | | } |
172 | | |
173 | | template <bool isColor> |
174 | | static void D32_A8_Opaque_Color_lsx(void* SK_RESTRICT dst, size_t dstRB, |
175 | | const void* SK_RESTRICT maskPtr, size_t maskRB, |
176 | | SkColor color, int width, int height) { |
177 | | SkPMColor pmc = SkPreMultiplyColor(color); |
178 | | SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
179 | | const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
180 | | __m128i vpmc_b = __lsx_vldi(0); |
181 | | __m128i vpmc_g = __lsx_vldi(0); |
182 | | __m128i vpmc_r = __lsx_vldi(0); |
183 | | __m128i vpmc_a = __lsx_vldi(0); |
184 | | |
185 | | // Nine patch may set maskRB to 0 to blit the same row repeatedly. |
186 | | ptrdiff_t mask_adjust = (ptrdiff_t)maskRB - width; |
187 | | dstRB -= (width << 2); |
188 | | |
189 | | if (width >= 8) { |
190 | | vpmc_b = __lsx_vreplgr2vr_h(SkGetPackedB32(pmc)); |
191 | | vpmc_g = __lsx_vreplgr2vr_h(SkGetPackedG32(pmc)); |
192 | | vpmc_r = __lsx_vreplgr2vr_h(SkGetPackedR32(pmc)); |
193 | | vpmc_a = __lsx_vreplgr2vr_h(SkGetPackedA32(pmc)); |
194 | | } |
195 | | |
196 | | const __m128i zeros = __lsx_vldi(0); |
197 | | __m128i planar = __lsx_vldi(0); |
198 | | planar = __lsx_vinsgr2vr_d(planar, 0x0d0905010c080400, 0); |
199 | | planar = __lsx_vinsgr2vr_d(planar, 0x0f0b07030e0a0602, 1); |
200 | | |
201 | | do{ |
202 | | int w = width; |
203 | | while(w >= 8){ |
204 | | __m128i lo = __lsx_vld(device, 0); // bgra bgra bgra bgra |
205 | | __m128i hi = __lsx_vld(device, 16); // BGRA BGRA BGRA BGRA |
206 | | lo = __lsx_vshuf_b(zeros, lo, planar); // bbbb gggg rrrr aaaa |
207 | | hi = __lsx_vshuf_b(zeros, hi, planar); // BBBB GGGG RRRR AAAA |
208 | | __m128i bg = __lsx_vilvl_w(hi, lo), // bbbb BBBB gggg GGGG |
209 | | ra = __lsx_vilvh_w(hi, lo); // rrrr RRRR aaaa AAAA |
210 | | |
211 | | __m128i b = __lsx_vilvl_b(zeros, bg), // _b_b _b_b _B_B _B_B |
212 | | g = __lsx_vilvh_b(zeros, bg), // _g_g _g_g _G_G _G_G |
213 | | r = __lsx_vilvl_b(zeros, ra), // _r_r _r_r _R_R _R_R |
214 | | a = __lsx_vilvh_b(zeros, ra); // _a_a _a_a _A_A _A_A |
215 | | |
216 | | __m128i vmask = __lsx_vld(mask, 0); |
217 | | vmask = __lsx_vilvl_b(zeros, vmask); |
218 | | __m128i vscale, vmask256 = __lsx_vadd_h(vmask, __lsx_vreplgr2vr_h(1)); |
219 | | |
220 | | if (isColor) { |
221 | | __m128i tmp = SkAlphaMul_lsx(vpmc_a, vmask256); |
222 | | vscale = __lsx_vsub_h(__lsx_vreplgr2vr_h(256), tmp); |
223 | | } else { |
224 | | vscale = __lsx_vsub_h(__lsx_vreplgr2vr_h(256), vmask); |
225 | | } |
226 | | |
227 | | b = SkAlphaMul_lsx(vpmc_b, vmask256) + SkAlphaMul_lsx(b, vscale); |
228 | | g = SkAlphaMul_lsx(vpmc_g, vmask256) + SkAlphaMul_lsx(g, vscale); |
229 | | r = SkAlphaMul_lsx(vpmc_r, vmask256) + SkAlphaMul_lsx(r, vscale); |
230 | | a = SkAlphaMul_lsx(vpmc_a, vmask256) + SkAlphaMul_lsx(a, vscale); |
231 | | |
232 | | bg = __lsx_vor_v(b, __lsx_vslli_h(g, 8)); // bgbg bgbg BGBG BGBG |
233 | | ra = __lsx_vor_v(r, __lsx_vslli_h(a, 8)); // rara rara RARA RARA |
234 | | lo = __lsx_vilvl_h(ra, bg); // bgra bgra bgra bgra |
235 | | hi = __lsx_vilvh_h(ra, bg); // BGRA BGRA BGRA BGRA |
236 | | |
237 | | __lsx_vst(lo, device, 0); |
238 | | __lsx_vst(hi, device, 16); |
239 | | |
240 | | mask += 8; |
241 | | device += 8; |
242 | | w -= 8; |
243 | | } |
244 | | |
245 | | while (w--) { |
246 | | unsigned aa = *mask++; |
247 | | if (isColor) { |
248 | | *device = SkBlendARGB32(pmc, *device, aa); |
249 | | } else { |
250 | | *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) |
251 | | + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
252 | | } |
253 | | device += 1; |
254 | | } |
255 | | |
256 | | device = (uint32_t *)((char*)device + dstRB); |
257 | | mask += mask_adjust; |
258 | | |
259 | | } while (--height != 0); |
260 | | } |
261 | | |
262 | | static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, |
263 | | const SkAlpha* mask, size_t maskRB, |
264 | | SkColor color, int w, int h) { |
265 | | D32_A8_Opaque_Color_lsx<true>(dst, dstRB, mask, maskRB, color, w, h); |
266 | | } |
267 | | |
268 | | static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, |
269 | | const SkAlpha* mask, size_t maskRB, |
270 | | SkColor color, int w, int h) { |
271 | | D32_A8_Opaque_Color_lsx<false>(dst, dstRB, mask, maskRB, color, w, h); |
272 | | } |
273 | | |
274 | | // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. |
275 | | static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, |
276 | | const SkAlpha* maskPtr, size_t maskRB, |
277 | | int width, int height) { |
278 | | SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
279 | | const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
280 | | |
281 | | // Nine patch may set maskRB to 0 to blit the same row repeatedly. |
282 | | ptrdiff_t mask_adjust = (ptrdiff_t)maskRB - width; |
283 | | dstRB -= (width << 2); |
284 | | const __m128i zeros = __lsx_vldi(0); |
285 | | __m128i planar = __lsx_vldi(0); |
286 | | planar = __lsx_vinsgr2vr_d(planar, 0x0d0905010c080400, 0); |
287 | | planar = __lsx_vinsgr2vr_d(planar, 0x0f0b07030e0a0602, 1); |
288 | | |
289 | | do { |
290 | | int w = width; |
291 | | while (w >= 8) { |
292 | | __m128i vmask = __lsx_vld(mask, 0); |
293 | | vmask = __lsx_vilvl_b(zeros, vmask); |
294 | | __m128i vscale = __lsx_vsub_h(__lsx_vreplgr2vr_h(256), vmask); |
295 | | __m128i lo = __lsx_vld(device, 0); // bgra bgra bgra bgra |
296 | | __m128i hi = __lsx_vld(device, 16); // BGRA BGRA BGRA BGRA |
297 | | lo = __lsx_vshuf_b(zeros, lo, planar); // bbbb gggg rrrr aaaa |
298 | | hi = __lsx_vshuf_b(zeros, hi, planar); // BBBB GGGG RRRR AAAA |
299 | | __m128i bg = __lsx_vilvl_w(hi, lo), // bbbb BBBB gggg GGGG |
300 | | ra = __lsx_vilvh_w(hi, lo); // rrrr RRRR aaaa AAAA |
301 | | |
302 | | __m128i b = __lsx_vilvl_b(zeros, bg), // _b_b _b_b _B_B _B_B |
303 | | g = __lsx_vilvh_b(zeros, bg), // _g_g _g_g _G_G _G_G |
304 | | r = __lsx_vilvl_b(zeros, ra), // _r_r _r_r _R_R _R_R |
305 | | a = __lsx_vilvh_b(zeros, ra); // _a_a _a_a _A_A _A_A |
306 | | |
307 | | b = SkAlphaMul_lsx(b, vscale); |
308 | | g = SkAlphaMul_lsx(g, vscale); |
309 | | r = SkAlphaMul_lsx(r, vscale); |
310 | | a = SkAlphaMul_lsx(a, vscale); |
311 | | |
312 | | a += vmask; |
313 | | |
314 | | bg = __lsx_vor_v(b, __lsx_vslli_h(g, 8)); // bgbg bgbg BGBG BGBG |
315 | | ra = __lsx_vor_v(r, __lsx_vslli_h(a, 8)); // rara rara RARA RARA |
316 | | lo = __lsx_vilvl_h(ra, bg); // bgra bgra bgra bgra |
317 | | hi = __lsx_vilvh_h(ra, bg); // BGRA BGRA BGRA BGRA |
318 | | |
319 | | __lsx_vst(lo, device, 0); |
320 | | __lsx_vst(hi, device, 16); |
321 | | |
322 | | mask += 8; |
323 | | device += 8; |
324 | | w -= 8; |
325 | | } |
326 | | |
327 | | while (w-- > 0) { |
328 | | unsigned aa = *mask++; |
329 | | *device = (aa << SK_A32_SHIFT) |
330 | | + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
331 | | device += 1; |
332 | | } |
333 | | |
334 | | device = (uint32_t*)((char*)device + dstRB); |
335 | | mask += mask_adjust; |
336 | | |
337 | | } while (--height != 0); |
338 | | } |
339 | | |
340 | | #else |
341 | | static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, |
342 | | const SkAlpha* mask, size_t maskRB, |
343 | 2.32k | SkColor color, int w, int h) { |
344 | 2.32k | auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); |
345 | 397k | auto fn = [&](const Sk4px& d, const Sk4px& aa) { |
346 | | // = (s + d(1-sa))aa + d(1-aa) |
347 | | // = s*aa + d(1-sa*aa) |
348 | 397k | auto left = s.approxMulDiv255(aa), |
349 | 397k | right = d.approxMulDiv255(left.alphas().inv()); |
350 | 397k | return left + right; // This does not overflow (exhaustively checked). |
351 | 397k | }; SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_general(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const Line | Count | Source | 345 | 397k | auto fn = [&](const Sk4px& d, const Sk4px& aa) { | 346 | | // = (s + d(1-sa))aa + d(1-aa) | 347 | | // = s*aa + d(1-sa*aa) | 348 | 397k | auto left = s.approxMulDiv255(aa), | 349 | 397k | right = d.approxMulDiv255(left.alphas().inv()); | 350 | 397k | return left + right; // This does not overflow (exhaustively checked). | 351 | 397k | }; |
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_general(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const |
352 | 31.4k | while (h --> 0) { |
353 | 29.1k | Sk4px::MapDstAlpha(w, dst, mask, fn); |
354 | 29.1k | dst += dstRB / sizeof(*dst); |
355 | 29.1k | mask += maskRB / sizeof(*mask); |
356 | 29.1k | } |
357 | 2.32k | } SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_general(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int) Line | Count | Source | 343 | 2.32k | SkColor color, int w, int h) { | 344 | 2.32k | auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); | 345 | 2.32k | auto fn = [&](const Sk4px& d, const Sk4px& aa) { | 346 | | // = (s + d(1-sa))aa + d(1-aa) | 347 | | // = s*aa + d(1-sa*aa) | 348 | 2.32k | auto left = s.approxMulDiv255(aa), | 349 | 2.32k | right = d.approxMulDiv255(left.alphas().inv()); | 350 | 2.32k | return left + right; // This does not overflow (exhaustively checked). | 351 | 2.32k | }; | 352 | 31.4k | while (h --> 0) { | 353 | 29.1k | Sk4px::MapDstAlpha(w, dst, mask, fn); | 354 | 29.1k | dst += dstRB / sizeof(*dst); | 355 | 29.1k | mask += maskRB / sizeof(*mask); | 356 | 29.1k | } | 357 | 2.32k | } |
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_general(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int) |
358 | | |
359 | | // As above, but made slightly simpler by requiring that color is opaque. |
360 | | static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, |
361 | | const SkAlpha* mask, size_t maskRB, |
362 | 2.14k | SkColor color, int w, int h) { |
363 | 2.14k | SkASSERT(SkColorGetA(color) == 0xFF); |
364 | 2.14k | auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); |
365 | 1.13M | auto fn = [&](const Sk4px& d, const Sk4px& aa) { |
366 | | // = (s + d(1-sa))aa + d(1-aa) |
367 | | // = s*aa + d(1-sa*aa) |
368 | | // ~~~> |
369 | | // = s*aa + d(1-aa) |
370 | 1.13M | return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv()); |
371 | 1.13M | }; SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const Line | Count | Source | 365 | 1.13M | auto fn = [&](const Sk4px& d, const Sk4px& aa) { | 366 | | // = (s + d(1-sa))aa + d(1-aa) | 367 | | // = s*aa + d(1-sa*aa) | 368 | | // ~~~> | 369 | | // = s*aa + d(1-aa) | 370 | 1.13M | return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv()); | 371 | 1.13M | }; |
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const |
372 | 57.4k | while (h --> 0) { |
373 | 55.2k | Sk4px::MapDstAlpha(w, dst, mask, fn); |
374 | 55.2k | dst += dstRB / sizeof(*dst); |
375 | 55.2k | mask += maskRB / sizeof(*mask); |
376 | 55.2k | } |
377 | 2.14k | } SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int) Line | Count | Source | 362 | 2.14k | SkColor color, int w, int h) { | 363 | 2.14k | SkASSERT(SkColorGetA(color) == 0xFF); | 364 | 2.14k | auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); | 365 | 2.14k | auto fn = [&](const Sk4px& d, const Sk4px& aa) { | 366 | | // = (s + d(1-sa))aa + d(1-aa) | 367 | | // = s*aa + d(1-sa*aa) | 368 | | // ~~~> | 369 | | // = s*aa + d(1-aa) | 370 | 2.14k | return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv()); | 371 | 2.14k | }; | 372 | 57.4k | while (h --> 0) { | 373 | 55.2k | Sk4px::MapDstAlpha(w, dst, mask, fn); | 374 | 55.2k | dst += dstRB / sizeof(*dst); | 375 | 55.2k | mask += maskRB / sizeof(*mask); | 376 | 55.2k | } | 377 | 2.14k | } |
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int) Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int) |
378 | | |
379 | | // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. |
380 | | static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, |
381 | | const SkAlpha* mask, size_t maskRB, |
382 | 11.3k | int w, int h) { |
383 | 6.63M | auto fn = [](const Sk4px& d, const Sk4px& aa) { |
384 | | // = (s + d(1-sa))aa + d(1-aa) |
385 | | // = s*aa + d(1-sa*aa) |
386 | | // ~~~> |
387 | | // a = 1*aa + d(1-1*aa) = aa + d(1-aa) |
388 | | // c = 0*aa + d(1-1*aa) = d(1-aa) |
389 | 6.63M | return (aa & Sk4px(skvx::byte16{0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255})) |
390 | 6.63M | + d.approxMulDiv255(aa.inv()); |
391 | 6.63M | }; SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_black(unsigned int*, unsigned long, unsigned char const*, unsigned long, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const Line | Count | Source | 383 | 6.63M | auto fn = [](const Sk4px& d, const Sk4px& aa) { | 384 | | // = (s + d(1-sa))aa + d(1-aa) | 385 | | // = s*aa + d(1-sa*aa) | 386 | | // ~~~> | 387 | | // a = 1*aa + d(1-1*aa) = aa + d(1-aa) | 388 | | // c = 0*aa + d(1-1*aa) = d(1-aa) | 389 | 6.63M | return (aa & Sk4px(skvx::byte16{0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255})) | 390 | 6.63M | + d.approxMulDiv255(aa.inv()); | 391 | 6.63M | }; |
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_black(unsigned int*, unsigned long, unsigned char const*, unsigned long, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const |
392 | 313k | while (h --> 0) { |
393 | 302k | Sk4px::MapDstAlpha(w, dst, mask, fn); |
394 | 302k | dst += dstRB / sizeof(*dst); |
395 | 302k | mask += maskRB / sizeof(*mask); |
396 | 302k | } |
397 | 11.3k | } SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_black(unsigned int*, unsigned long, unsigned char const*, unsigned long, int, int) Line | Count | Source | 382 | 11.3k | int w, int h) { | 383 | 11.3k | auto fn = [](const Sk4px& d, const Sk4px& aa) { | 384 | | // = (s + d(1-sa))aa + d(1-aa) | 385 | | // = s*aa + d(1-sa*aa) | 386 | | // ~~~> | 387 | | // a = 1*aa + d(1-1*aa) = aa + d(1-aa) | 388 | | // c = 0*aa + d(1-1*aa) = d(1-aa) | 389 | 11.3k | return (aa & Sk4px(skvx::byte16{0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255})) | 390 | 11.3k | + d.approxMulDiv255(aa.inv()); | 391 | 11.3k | }; | 392 | 313k | while (h --> 0) { | 393 | 302k | Sk4px::MapDstAlpha(w, dst, mask, fn); | 394 | 302k | dst += dstRB / sizeof(*dst); | 395 | 302k | mask += maskRB / sizeof(*mask); | 396 | 302k | } | 397 | 11.3k | } |
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_black(unsigned int*, unsigned long, unsigned char const*, unsigned long, int, int) |
398 | | #endif |
399 | | |
400 | | /*not static*/ inline void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, |
401 | | const SkAlpha* mask, size_t maskRB, |
402 | 15.8k | SkColor color, int w, int h) { |
403 | 15.8k | if (color == SK_ColorBLACK) { |
404 | 11.3k | blit_mask_d32_a8_black(dst, dstRB, mask, maskRB, w, h); |
405 | 11.3k | } else if (SkColorGetA(color) == 0xFF) { |
406 | 2.14k | blit_mask_d32_a8_opaque(dst, dstRB, mask, maskRB, color, w, h); |
407 | 2.32k | } else { |
408 | 2.32k | blit_mask_d32_a8_general(dst, dstRB, mask, maskRB, color, w, h); |
409 | 2.32k | } |
410 | 15.8k | } sse2::blit_mask_d32_a8(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int) Line | Count | Source | 402 | 15.8k | SkColor color, int w, int h) { | 403 | 15.8k | if (color == SK_ColorBLACK) { | 404 | 11.3k | blit_mask_d32_a8_black(dst, dstRB, mask, maskRB, w, h); | 405 | 11.3k | } else if (SkColorGetA(color) == 0xFF) { | 406 | 2.14k | blit_mask_d32_a8_opaque(dst, dstRB, mask, maskRB, color, w, h); | 407 | 2.32k | } else { | 408 | 2.32k | blit_mask_d32_a8_general(dst, dstRB, mask, maskRB, color, w, h); | 409 | 2.32k | } | 410 | 15.8k | } |
Unexecuted instantiation: ssse3::blit_mask_d32_a8(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int) |
411 | | |
412 | | } // namespace SK_OPTS_NS |
413 | | |
414 | | #endif//SkBlitMask_opts_DEFINED |