Coverage Report

Created: 2024-05-20 07:14

/src/skia/src/opts/SkBlitMask_opts.h
Line
Count
Source
1
/*
2
 * Copyright 2015 Google Inc.
3
 *
4
 * Use of this source code is governed by a BSD-style license that can be
5
 * found in the LICENSE file.
6
 */
7
8
#ifndef SkBlitMask_opts_DEFINED
9
#define SkBlitMask_opts_DEFINED
10
11
#include "include/private/base/SkFeatures.h"
12
#include "src/core/Sk4px.h"
13
14
#if defined(SK_ARM_HAS_NEON)
15
    #include <arm_neon.h>
16
#endif
17
18
namespace SK_OPTS_NS {
19
20
#if defined(SK_ARM_HAS_NEON)
21
    // The Sk4px versions below will work fine with NEON, but we have had many indications
22
    // that it doesn't perform as well as this NEON-specific code.  TODO(mtklein): why?
23
24
    #define NEON_A (SK_A32_SHIFT / 8)
25
    #define NEON_R (SK_R32_SHIFT / 8)
26
    #define NEON_G (SK_G32_SHIFT / 8)
27
    #define NEON_B (SK_B32_SHIFT / 8)
28
29
    static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) {
30
        return vaddw_u8(vdupq_n_u16(1), alpha);
31
    }
32
33
    static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) {
34
        return vshrn_n_u16(vmovl_u8(color) * scale, 8);
35
    }
36
37
    static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) {
38
        uint8x8x4_t ret;
39
40
        ret.val[0] = SkAlphaMul_neon8(color.val[0], scale);
41
        ret.val[1] = SkAlphaMul_neon8(color.val[1], scale);
42
        ret.val[2] = SkAlphaMul_neon8(color.val[2], scale);
43
        ret.val[3] = SkAlphaMul_neon8(color.val[3], scale);
44
45
        return ret;
46
    }
47
48
49
    template <bool isColor>
50
    static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
51
                                         const void* SK_RESTRICT maskPtr, size_t maskRB,
52
                                         SkColor color, int width, int height) {
53
        SkPMColor pmc = SkPreMultiplyColor(color);
54
        SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
55
        const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
56
        uint8x8x4_t vpmc;
57
58
        // Nine patch may set maskRB to 0 to blit the same row repeatedly.
59
        ptrdiff_t mask_adjust = (ptrdiff_t)maskRB - width;
60
        dstRB -= (width << 2);
61
62
        if (width >= 8) {
63
            vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
64
            vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
65
            vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
66
            vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
67
        }
68
        do {
69
            int w = width;
70
            while (w >= 8) {
71
                uint8x8_t vmask = vld1_u8(mask);
72
                uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask);
73
                if (isColor) {
74
                    vscale = vsubw_u8(vdupq_n_u16(256),
75
                            SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256));
76
                } else {
77
                    vscale = vsubw_u8(vdupq_n_u16(256), vmask);
78
                }
79
                uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
80
81
                vdev.val[NEON_A] =   SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
82
                    + SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
83
                vdev.val[NEON_R] =   SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
84
                    + SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
85
                vdev.val[NEON_G] =   SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
86
                    + SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
87
                vdev.val[NEON_B] =   SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
88
                    + SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
89
90
                vst4_u8((uint8_t*)device, vdev);
91
92
                mask += 8;
93
                device += 8;
94
                w -= 8;
95
            }
96
97
            while (w--) {
98
                unsigned aa = *mask++;
99
                if (isColor) {
100
                    *device = SkBlendARGB32(pmc, *device, aa);
101
                } else {
102
                    *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
103
                        + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
104
                }
105
                device += 1;
106
            }
107
108
            device = (uint32_t*)((char*)device + dstRB);
109
            mask += mask_adjust;
110
111
        } while (--height != 0);
112
    }
113
114
    static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB,
115
                                         const SkAlpha* mask, size_t maskRB,
116
                                         SkColor color, int w, int h) {
117
        D32_A8_Opaque_Color_neon<true>(dst, dstRB, mask, maskRB, color, w, h);
118
    }
119
120
    // As above, but made slightly simpler by requiring that color is opaque.
121
    static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB,
122
                                        const SkAlpha* mask, size_t maskRB,
123
                                        SkColor color, int w, int h) {
124
        D32_A8_Opaque_Color_neon<false>(dst, dstRB, mask, maskRB, color, w, h);
125
    }
126
127
    // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case.
128
    static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB,
129
                                       const SkAlpha* maskPtr, size_t maskRB,
130
                                       int width, int height) {
131
        SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
132
        const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
133
134
        // Nine patch may set maskRB to 0 to blit the same row repeatedly.
135
        ptrdiff_t mask_adjust = (ptrdiff_t)maskRB - width;
136
        dstRB -= (width << 2);
137
        do {
138
            int w = width;
139
            while (w >= 8) {
140
                uint8x8_t vmask = vld1_u8(mask);
141
                uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
142
                uint8x8x4_t vdevice = vld4_u8((uint8_t*)device);
143
144
                vdevice = SkAlphaMulQ_neon8(vdevice, vscale);
145
                vdevice.val[NEON_A] += vmask;
146
147
                vst4_u8((uint8_t*)device, vdevice);
148
149
                mask += 8;
150
                device += 8;
151
                w -= 8;
152
            }
153
            while (w-- > 0) {
154
                unsigned aa = *mask++;
155
                *device = (aa << SK_A32_SHIFT)
156
                            + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
157
                device += 1;
158
            }
159
            device = (uint32_t*)((char*)device + dstRB);
160
            mask += mask_adjust;
161
        } while (--height != 0);
162
    }
163
164
#elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LSX
165
    #include <lsxintrin.h>
166
167
    static __m128i SkAlphaMul_lsx(__m128i x, __m128i y) {
168
        __m128i tmp = __lsx_vmul_h(x, y);
169
        __m128i mask = __lsx_vreplgr2vr_h(0xff00);
170
        return __lsx_vsrlri_h(__lsx_vand_v(tmp, mask), 8);
171
    }
172
173
    template <bool isColor>
174
    static void D32_A8_Opaque_Color_lsx(void* SK_RESTRICT dst, size_t dstRB,
175
                                         const void* SK_RESTRICT maskPtr, size_t maskRB,
176
                                         SkColor color, int width, int height) {
177
        SkPMColor pmc = SkPreMultiplyColor(color);
178
        SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
179
        const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
180
        __m128i vpmc_b = __lsx_vldi(0);
181
        __m128i vpmc_g = __lsx_vldi(0);
182
        __m128i vpmc_r = __lsx_vldi(0);
183
        __m128i vpmc_a = __lsx_vldi(0);
184
185
        // Nine patch may set maskRB to 0 to blit the same row repeatedly.
186
        ptrdiff_t mask_adjust = (ptrdiff_t)maskRB - width;
187
        dstRB -= (width << 2);
188
189
        if (width >= 8) {
190
            vpmc_b = __lsx_vreplgr2vr_h(SkGetPackedB32(pmc));
191
            vpmc_g = __lsx_vreplgr2vr_h(SkGetPackedG32(pmc));
192
            vpmc_r = __lsx_vreplgr2vr_h(SkGetPackedR32(pmc));
193
            vpmc_a = __lsx_vreplgr2vr_h(SkGetPackedA32(pmc));
194
        }
195
196
        const __m128i zeros = __lsx_vldi(0);
197
        __m128i planar = __lsx_vldi(0);
198
        planar = __lsx_vinsgr2vr_d(planar, 0x0d0905010c080400, 0);
199
        planar = __lsx_vinsgr2vr_d(planar, 0x0f0b07030e0a0602, 1);
200
201
        do{
202
            int w = width;
203
            while(w >= 8){
204
                __m128i lo = __lsx_vld(device, 0);         // bgra bgra bgra bgra
205
                __m128i hi = __lsx_vld(device, 16);        // BGRA BGRA BGRA BGRA
206
                lo = __lsx_vshuf_b(zeros, lo, planar);     // bbbb gggg rrrr aaaa
207
                hi = __lsx_vshuf_b(zeros, hi, planar);     // BBBB GGGG RRRR AAAA
208
                __m128i bg = __lsx_vilvl_w(hi, lo),        // bbbb BBBB gggg GGGG
209
                        ra = __lsx_vilvh_w(hi, lo);        // rrrr RRRR aaaa AAAA
210
211
                __m128i b = __lsx_vilvl_b(zeros, bg),      // _b_b _b_b _B_B _B_B
212
                        g = __lsx_vilvh_b(zeros, bg),      // _g_g _g_g _G_G _G_G
213
                        r = __lsx_vilvl_b(zeros, ra),      // _r_r _r_r _R_R _R_R
214
                        a = __lsx_vilvh_b(zeros, ra);      // _a_a _a_a _A_A _A_A
215
216
                __m128i vmask = __lsx_vld(mask, 0);
217
                vmask = __lsx_vilvl_b(zeros, vmask);
218
                __m128i vscale, vmask256 = __lsx_vadd_h(vmask, __lsx_vreplgr2vr_h(1));
219
220
                if (isColor) {
221
                    __m128i tmp = SkAlphaMul_lsx(vpmc_a, vmask256);
222
                    vscale = __lsx_vsub_h(__lsx_vreplgr2vr_h(256), tmp);
223
                } else {
224
                    vscale = __lsx_vsub_h(__lsx_vreplgr2vr_h(256), vmask);
225
                }
226
227
                b = SkAlphaMul_lsx(vpmc_b, vmask256) + SkAlphaMul_lsx(b, vscale);
228
                g = SkAlphaMul_lsx(vpmc_g, vmask256) + SkAlphaMul_lsx(g, vscale);
229
                r = SkAlphaMul_lsx(vpmc_r, vmask256) + SkAlphaMul_lsx(r, vscale);
230
                a = SkAlphaMul_lsx(vpmc_a, vmask256) + SkAlphaMul_lsx(a, vscale);
231
232
                bg = __lsx_vor_v(b, __lsx_vslli_h(g, 8));  // bgbg bgbg BGBG BGBG
233
                ra = __lsx_vor_v(r, __lsx_vslli_h(a, 8));  // rara rara RARA RARA
234
                lo = __lsx_vilvl_h(ra, bg);                // bgra bgra bgra bgra
235
                hi = __lsx_vilvh_h(ra, bg);                // BGRA BGRA BGRA BGRA
236
237
                __lsx_vst(lo, device, 0);
238
                __lsx_vst(hi, device, 16);
239
240
                mask += 8;
241
                device += 8;
242
                w -= 8;
243
            }
244
245
            while (w--) {
246
                unsigned aa = *mask++;
247
                if (isColor) {
248
                    *device = SkBlendARGB32(pmc, *device, aa);
249
                } else {
250
                    *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
251
                        + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
252
                }
253
                device += 1;
254
            }
255
256
            device = (uint32_t *)((char*)device + dstRB);
257
            mask += mask_adjust;
258
259
        } while (--height != 0);
260
    }
261
262
    static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB,
263
                                         const SkAlpha* mask, size_t maskRB,
264
                                         SkColor color, int w, int h) {
265
        D32_A8_Opaque_Color_lsx<true>(dst, dstRB, mask, maskRB, color, w, h);
266
    }
267
268
    static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB,
269
                                         const SkAlpha* mask, size_t maskRB,
270
                                         SkColor color, int w, int h) {
271
        D32_A8_Opaque_Color_lsx<false>(dst, dstRB, mask, maskRB, color, w, h);
272
    }
273
274
    // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case.
275
    static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB,
276
                                       const SkAlpha* maskPtr, size_t maskRB,
277
                                       int width, int height) {
278
        SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
279
        const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
280
281
        // Nine patch may set maskRB to 0 to blit the same row repeatedly.
282
        ptrdiff_t mask_adjust = (ptrdiff_t)maskRB - width;
283
        dstRB -= (width << 2);
284
        const __m128i zeros = __lsx_vldi(0);
285
        __m128i planar = __lsx_vldi(0);
286
        planar = __lsx_vinsgr2vr_d(planar, 0x0d0905010c080400, 0);
287
        planar = __lsx_vinsgr2vr_d(planar, 0x0f0b07030e0a0602, 1);
288
289
        do {
290
            int w = width;
291
            while (w >= 8) {
292
                __m128i vmask = __lsx_vld(mask, 0);
293
                vmask = __lsx_vilvl_b(zeros, vmask);
294
                __m128i vscale = __lsx_vsub_h(__lsx_vreplgr2vr_h(256), vmask);
295
                __m128i lo = __lsx_vld(device, 0);         // bgra bgra bgra bgra
296
                __m128i hi = __lsx_vld(device, 16);        // BGRA BGRA BGRA BGRA
297
                lo = __lsx_vshuf_b(zeros, lo, planar);     // bbbb gggg rrrr aaaa
298
                hi = __lsx_vshuf_b(zeros, hi, planar);     // BBBB GGGG RRRR AAAA
299
                __m128i bg = __lsx_vilvl_w(hi, lo),        // bbbb BBBB gggg GGGG
300
                        ra = __lsx_vilvh_w(hi, lo);        // rrrr RRRR aaaa AAAA
301
302
                __m128i b = __lsx_vilvl_b(zeros, bg),      // _b_b _b_b _B_B _B_B
303
                        g = __lsx_vilvh_b(zeros, bg),      // _g_g _g_g _G_G _G_G
304
                        r = __lsx_vilvl_b(zeros, ra),      // _r_r _r_r _R_R _R_R
305
                        a = __lsx_vilvh_b(zeros, ra);      // _a_a _a_a _A_A _A_A
306
307
                b = SkAlphaMul_lsx(b, vscale);
308
                g = SkAlphaMul_lsx(g, vscale);
309
                r = SkAlphaMul_lsx(r, vscale);
310
                a = SkAlphaMul_lsx(a, vscale);
311
312
                a += vmask;
313
314
                bg = __lsx_vor_v(b, __lsx_vslli_h(g, 8));  // bgbg bgbg BGBG BGBG
315
                ra = __lsx_vor_v(r, __lsx_vslli_h(a, 8));  // rara rara RARA RARA
316
                lo = __lsx_vilvl_h(ra, bg);                // bgra bgra bgra bgra
317
                hi = __lsx_vilvh_h(ra, bg);                // BGRA BGRA BGRA BGRA
318
319
                __lsx_vst(lo, device, 0);
320
                __lsx_vst(hi, device, 16);
321
322
                mask += 8;
323
                device += 8;
324
                w -= 8;
325
            }
326
327
            while (w-- > 0) {
328
                unsigned aa = *mask++;
329
                *device = (aa << SK_A32_SHIFT)
330
                            + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
331
                device += 1;
332
            }
333
334
            device = (uint32_t*)((char*)device + dstRB);
335
            mask += mask_adjust;
336
337
        } while (--height != 0);
338
    }
339
340
#else
341
    static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB,
342
                                         const SkAlpha* mask, size_t maskRB,
343
2.32k
                                         SkColor color, int w, int h) {
344
2.32k
        auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color));
345
397k
        auto fn = [&](const Sk4px& d, const Sk4px& aa) {
346
            //  = (s + d(1-sa))aa + d(1-aa)
347
            //  = s*aa + d(1-sa*aa)
348
397k
            auto left  = s.approxMulDiv255(aa),
349
397k
                 right = d.approxMulDiv255(left.alphas().inv());
350
397k
            return left + right;  // This does not overflow (exhaustively checked).
351
397k
        };
SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_general(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const
Line
Count
Source
345
397k
        auto fn = [&](const Sk4px& d, const Sk4px& aa) {
346
            //  = (s + d(1-sa))aa + d(1-aa)
347
            //  = s*aa + d(1-sa*aa)
348
397k
            auto left  = s.approxMulDiv255(aa),
349
397k
                 right = d.approxMulDiv255(left.alphas().inv());
350
397k
            return left + right;  // This does not overflow (exhaustively checked).
351
397k
        };
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_general(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const
352
31.4k
        while (h --> 0) {
353
29.1k
            Sk4px::MapDstAlpha(w, dst, mask, fn);
354
29.1k
            dst  +=  dstRB / sizeof(*dst);
355
29.1k
            mask += maskRB / sizeof(*mask);
356
29.1k
        }
357
2.32k
    }
SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_general(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)
Line
Count
Source
343
2.32k
                                         SkColor color, int w, int h) {
344
2.32k
        auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color));
345
2.32k
        auto fn = [&](const Sk4px& d, const Sk4px& aa) {
346
            //  = (s + d(1-sa))aa + d(1-aa)
347
            //  = s*aa + d(1-sa*aa)
348
2.32k
            auto left  = s.approxMulDiv255(aa),
349
2.32k
                 right = d.approxMulDiv255(left.alphas().inv());
350
2.32k
            return left + right;  // This does not overflow (exhaustively checked).
351
2.32k
        };
352
31.4k
        while (h --> 0) {
353
29.1k
            Sk4px::MapDstAlpha(w, dst, mask, fn);
354
29.1k
            dst  +=  dstRB / sizeof(*dst);
355
29.1k
            mask += maskRB / sizeof(*mask);
356
29.1k
        }
357
2.32k
    }
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_general(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)
358
359
    // As above, but made slightly simpler by requiring that color is opaque.
360
    static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB,
361
                                        const SkAlpha* mask, size_t maskRB,
362
2.14k
                                        SkColor color, int w, int h) {
363
2.14k
        SkASSERT(SkColorGetA(color) == 0xFF);
364
2.14k
        auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color));
365
1.13M
        auto fn = [&](const Sk4px& d, const Sk4px& aa) {
366
            //  = (s + d(1-sa))aa + d(1-aa)
367
            //  = s*aa + d(1-sa*aa)
368
            //   ~~~>
369
            //  = s*aa + d(1-aa)
370
1.13M
            return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv());
371
1.13M
        };
SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const
Line
Count
Source
365
1.13M
        auto fn = [&](const Sk4px& d, const Sk4px& aa) {
366
            //  = (s + d(1-sa))aa + d(1-aa)
367
            //  = s*aa + d(1-sa*aa)
368
            //   ~~~>
369
            //  = s*aa + d(1-aa)
370
1.13M
            return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv());
371
1.13M
        };
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const
372
57.4k
        while (h --> 0) {
373
55.2k
            Sk4px::MapDstAlpha(w, dst, mask, fn);
374
55.2k
            dst  +=  dstRB / sizeof(*dst);
375
55.2k
            mask += maskRB / sizeof(*mask);
376
55.2k
        }
377
2.14k
    }
SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)
Line
Count
Source
362
2.14k
                                        SkColor color, int w, int h) {
363
2.14k
        SkASSERT(SkColorGetA(color) == 0xFF);
364
2.14k
        auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color));
365
2.14k
        auto fn = [&](const Sk4px& d, const Sk4px& aa) {
366
            //  = (s + d(1-sa))aa + d(1-aa)
367
            //  = s*aa + d(1-sa*aa)
368
            //   ~~~>
369
            //  = s*aa + d(1-aa)
370
2.14k
            return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv());
371
2.14k
        };
372
57.4k
        while (h --> 0) {
373
55.2k
            Sk4px::MapDstAlpha(w, dst, mask, fn);
374
55.2k
            dst  +=  dstRB / sizeof(*dst);
375
55.2k
            mask += maskRB / sizeof(*mask);
376
55.2k
        }
377
2.14k
    }
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_opaque(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)
378
379
    // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case.
380
    static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB,
381
                                       const SkAlpha* mask, size_t maskRB,
382
11.3k
                                       int w, int h) {
383
6.63M
        auto fn = [](const Sk4px& d, const Sk4px& aa) {
384
            //   = (s + d(1-sa))aa + d(1-aa)
385
            //   = s*aa + d(1-sa*aa)
386
            //   ~~~>
387
            // a = 1*aa + d(1-1*aa) = aa + d(1-aa)
388
            // c = 0*aa + d(1-1*aa) =      d(1-aa)
389
6.63M
            return (aa & Sk4px(skvx::byte16{0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255}))
390
6.63M
                 + d.approxMulDiv255(aa.inv());
391
6.63M
        };
SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_black(unsigned int*, unsigned long, unsigned char const*, unsigned long, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const
Line
Count
Source
383
6.63M
        auto fn = [](const Sk4px& d, const Sk4px& aa) {
384
            //   = (s + d(1-sa))aa + d(1-aa)
385
            //   = s*aa + d(1-sa*aa)
386
            //   ~~~>
387
            // a = 1*aa + d(1-1*aa) = aa + d(1-aa)
388
            // c = 0*aa + d(1-1*aa) =      d(1-aa)
389
6.63M
            return (aa & Sk4px(skvx::byte16{0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255}))
390
6.63M
                 + d.approxMulDiv255(aa.inv());
391
6.63M
        };
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_black(unsigned int*, unsigned long, unsigned char const*, unsigned long, int, int)::$_0::operator()(Sk4px const&, Sk4px const&) const
392
313k
        while (h --> 0) {
393
302k
            Sk4px::MapDstAlpha(w, dst, mask, fn);
394
302k
            dst  +=  dstRB / sizeof(*dst);
395
302k
            mask += maskRB / sizeof(*mask);
396
302k
        }
397
11.3k
    }
SkBlitMask_opts.cpp:sse2::blit_mask_d32_a8_black(unsigned int*, unsigned long, unsigned char const*, unsigned long, int, int)
Line
Count
Source
382
11.3k
                                       int w, int h) {
383
11.3k
        auto fn = [](const Sk4px& d, const Sk4px& aa) {
384
            //   = (s + d(1-sa))aa + d(1-aa)
385
            //   = s*aa + d(1-sa*aa)
386
            //   ~~~>
387
            // a = 1*aa + d(1-1*aa) = aa + d(1-aa)
388
            // c = 0*aa + d(1-1*aa) =      d(1-aa)
389
11.3k
            return (aa & Sk4px(skvx::byte16{0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255}))
390
11.3k
                 + d.approxMulDiv255(aa.inv());
391
11.3k
        };
392
313k
        while (h --> 0) {
393
302k
            Sk4px::MapDstAlpha(w, dst, mask, fn);
394
302k
            dst  +=  dstRB / sizeof(*dst);
395
302k
            mask += maskRB / sizeof(*mask);
396
302k
        }
397
11.3k
    }
Unexecuted instantiation: SkBlitMask_opts_ssse3.cpp:ssse3::blit_mask_d32_a8_black(unsigned int*, unsigned long, unsigned char const*, unsigned long, int, int)
398
#endif
399
400
/*not static*/ inline void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB,
401
                                            const SkAlpha* mask, size_t maskRB,
402
15.8k
                                            SkColor color, int w, int h) {
403
15.8k
    if (color == SK_ColorBLACK) {
404
11.3k
        blit_mask_d32_a8_black(dst, dstRB, mask, maskRB, w, h);
405
11.3k
    } else if (SkColorGetA(color) == 0xFF) {
406
2.14k
        blit_mask_d32_a8_opaque(dst, dstRB, mask, maskRB, color, w, h);
407
2.32k
    } else {
408
2.32k
        blit_mask_d32_a8_general(dst, dstRB, mask, maskRB, color, w, h);
409
2.32k
    }
410
15.8k
}
sse2::blit_mask_d32_a8(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)
Line
Count
Source
402
15.8k
                                            SkColor color, int w, int h) {
403
15.8k
    if (color == SK_ColorBLACK) {
404
11.3k
        blit_mask_d32_a8_black(dst, dstRB, mask, maskRB, w, h);
405
11.3k
    } else if (SkColorGetA(color) == 0xFF) {
406
2.14k
        blit_mask_d32_a8_opaque(dst, dstRB, mask, maskRB, color, w, h);
407
2.32k
    } else {
408
2.32k
        blit_mask_d32_a8_general(dst, dstRB, mask, maskRB, color, w, h);
409
2.32k
    }
410
15.8k
}
Unexecuted instantiation: ssse3::blit_mask_d32_a8(unsigned int*, unsigned long, unsigned char const*, unsigned long, unsigned int, int, int)
411
412
}  // namespace SK_OPTS_NS
413
414
#endif//SkBlitMask_opts_DEFINED