Coverage Report

Created: 2025-08-29 06:20

/src/libass/libass/ass_bitmap_engine.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) 2021-2022 libass contributors
3
 *
4
 * This file is part of libass.
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
19
#include "config.h"
20
#include "ass_compat.h"
21
22
#include <stdbool.h>
23
24
#include "ass_bitmap_engine.h"
25
#include "x86/cpuid.h"
26
27
28
#define RASTERIZER_PROTOTYPES(tile_size, suffix) \
29
23.5k
    FillSolidTileFunc     ass_fill_solid_tile     ## tile_size ## _ ## suffix; \
30
23.5k
    FillHalfplaneTileFunc ass_fill_halfplane_tile ## tile_size ## _ ## suffix; \
31
23.5k
    FillGenericTileFunc   ass_fill_generic_tile   ## tile_size ## _ ## suffix; \
32
23.5k
    MergeTileFunc         ass_merge_tile          ## tile_size ## _ ## suffix;
33
34
#define RASTERIZER_FUNCTION(name, suffix) \
35
47.1k
    engine.name = mask & ASS_FLAG_LARGE_TILES ? \
36
47.1k
        ass_ ## name ## _tile32_ ## suffix : \
37
47.1k
        ass_ ## name ## _tile16_ ## suffix;
38
39
#define RASTERIZER_FUNCTIONS(suffix) \
40
11.7k
    RASTERIZER_FUNCTION(fill_solid,     suffix) \
41
11.7k
    RASTERIZER_FUNCTION(fill_halfplane, suffix) \
42
11.7k
    RASTERIZER_FUNCTION(fill_generic,   suffix) \
43
11.7k
    RASTERIZER_FUNCTION(merge,          suffix)
44
45
46
#define GENERIC_PROTOTYPES(suffix) \
47
11.7k
    BitmapBlendFunc ass_add_bitmaps_  ## suffix; \
48
11.7k
    BitmapBlendFunc ass_imul_bitmaps_ ## suffix; \
49
11.7k
    BitmapMulFunc   ass_mul_bitmaps_  ## suffix; \
50
11.7k
    BeBlurFunc      ass_be_blur_      ## suffix;
51
52
#define GENERIC_FUNCTION(name, suffix) \
53
47.1k
    engine.name = ass_ ## name ## _ ## suffix;
54
55
#define GENERIC_FUNCTIONS(suffix) \
56
11.7k
    GENERIC_FUNCTION(add_bitmaps,  suffix) \
57
11.7k
    GENERIC_FUNCTION(imul_bitmaps, suffix) \
58
11.7k
    GENERIC_FUNCTION(mul_bitmaps,  suffix) \
59
11.7k
    GENERIC_FUNCTION(be_blur,      suffix)
60
61
62
#define PARAM_BLUR_SET(suffix) \
63
    ass_blur4_ ## suffix, \
64
    ass_blur5_ ## suffix, \
65
    ass_blur6_ ## suffix, \
66
    ass_blur7_ ## suffix, \
67
    ass_blur8_ ## suffix
68
69
#define BLUR_PROTOTYPES(stripe_width, suffix) \
70
23.5k
    Convert8to16Func ass_stripe_unpack  ## stripe_width ## _ ## suffix; \
71
23.5k
    Convert16to8Func ass_stripe_pack    ## stripe_width ## _ ## suffix; \
72
23.5k
    FilterFunc       ass_shrink_horz    ## stripe_width ## _ ## suffix; \
73
23.5k
    FilterFunc       ass_shrink_vert    ## stripe_width ## _ ## suffix; \
74
23.5k
    FilterFunc       ass_expand_horz    ## stripe_width ## _ ## suffix; \
75
23.5k
    FilterFunc       ass_expand_vert    ## stripe_width ## _ ## suffix; \
76
23.5k
    ParamFilterFunc PARAM_BLUR_SET(horz ## stripe_width ## _ ## suffix); \
77
23.5k
    ParamFilterFunc PARAM_BLUR_SET(vert ## stripe_width ## _ ## suffix);
78
79
#define BLUR_FUNCTION(name, alignment, suffix) \
80
70.7k
    engine.name = ass_ ## name ## alignment ## _ ## suffix;
81
82
#define PARAM_BLUR_FUNCTION(dir, alignment, suffix) \
83
23.5k
    engine.blur_ ## dir[0] = ass_blur4_ ## dir ## alignment ## _ ## suffix; \
84
23.5k
    engine.blur_ ## dir[1] = ass_blur5_ ## dir ## alignment ## _ ## suffix; \
85
23.5k
    engine.blur_ ## dir[2] = ass_blur6_ ## dir ## alignment ## _ ## suffix; \
86
23.5k
    engine.blur_ ## dir[3] = ass_blur7_ ## dir ## alignment ## _ ## suffix; \
87
23.5k
    engine.blur_ ## dir[4] = ass_blur8_ ## dir ## alignment ## _ ## suffix;
88
89
#define BLUR_FUNCTIONS(align_order_, alignment, suffix) \
90
11.7k
    BLUR_FUNCTION(stripe_unpack, alignment, suffix) \
91
11.7k
    BLUR_FUNCTION(stripe_pack,   alignment, suffix) \
92
11.7k
    BLUR_FUNCTION(shrink_horz,   alignment, suffix) \
93
11.7k
    BLUR_FUNCTION(shrink_vert,   alignment, suffix) \
94
11.7k
    BLUR_FUNCTION(expand_horz,   alignment, suffix) \
95
11.7k
    BLUR_FUNCTION(expand_vert,   alignment, suffix) \
96
11.7k
    PARAM_BLUR_FUNCTION(horz, alignment, suffix) \
97
11.7k
    PARAM_BLUR_FUNCTION(vert, alignment, suffix) \
98
11.7k
    engine.align_order = align_order_;
99
100
101
#define ALL_PROTOTYPES(alignment, suffix) \
102
11.7k
    RASTERIZER_PROTOTYPES(16, suffix) \
103
11.7k
    RASTERIZER_PROTOTYPES(32, suffix) \
104
11.7k
    GENERIC_PROTOTYPES(suffix) \
105
11.7k
    BLUR_PROTOTYPES(alignment, suffix)
106
107
#define ALL_FUNCTIONS(align_order_, alignment, suffix) \
108
11.7k
    RASTERIZER_FUNCTIONS(suffix) \
109
11.7k
    GENERIC_FUNCTIONS(suffix) \
110
11.7k
    BLUR_FUNCTIONS(align_order_, alignment, suffix)
111
112
113
unsigned ass_get_cpu_flags(unsigned mask)
114
0
{
115
0
    unsigned flags = ASS_CPU_FLAG_NONE;
116
117
#if CONFIG_ASM && ARCH_X86
118
119
    if (!ass_has_cpuid())
120
        return flags & mask;
121
122
    uint32_t eax = 0, ebx, ecx, edx;
123
    ass_get_cpuid(&eax, &ebx, &ecx, &edx);
124
    uint32_t max_leaf = eax;
125
126
    bool avx = false;
127
    if (max_leaf >= 1) {
128
        eax = 1;
129
        ass_get_cpuid(&eax, &ebx, &ecx, &edx);
130
        if (edx & (1 << 26)) {  // SSE2
131
            flags |= ASS_CPU_FLAG_X86_SSE2;
132
            if (ecx & (1 << 0) &&  // SSE3
133
                ecx & (1 << 9))    // SSSE3
134
                    flags |= ASS_CPU_FLAG_X86_SSSE3;
135
        }
136
137
        if (ecx & (1 << 27) &&  // OSXSAVE
138
            ecx & (1 << 28)) {  // AVX
139
            uint32_t xcr0l, xcr0h;
140
            ass_get_xgetbv(0, &xcr0l, &xcr0h);
141
            if (xcr0l & (1 << 1) &&  // XSAVE for XMM
142
                xcr0l & (1 << 2))    // XSAVE for YMM
143
                    avx = true;
144
        }
145
    }
146
147
    if (max_leaf >= 7) {
148
        eax = 7;
149
        ass_get_cpuid(&eax, &ebx, &ecx, &edx);
150
        if (avx && ebx & (1 << 5))  // AVX2
151
            flags |= ASS_CPU_FLAG_X86_AVX2;
152
    }
153
154
#endif
155
156
#if ARCH_AARCH64
157
    flags = ASS_CPU_FLAG_ARM_NEON;
158
#endif
159
160
0
    return flags & mask;
161
0
}
162
163
BitmapEngine ass_bitmap_engine_init(unsigned mask)
164
11.7k
{
165
11.7k
    ALL_PROTOTYPES(16, c)
166
11.7k
    BLUR_PROTOTYPES(32, c)
167
11.7k
    BitmapEngine engine = {0};
168
11.7k
    engine.tile_order = mask & ASS_FLAG_LARGE_TILES ? 5 : 4;
169
170
#if CONFIG_ASM
171
    unsigned flags = ass_get_cpu_flags(mask);
172
#if ARCH_X86
173
    if (flags & ASS_CPU_FLAG_X86_AVX2) {
174
        ALL_PROTOTYPES(32, avx2)
175
        ALL_FUNCTIONS(5, 32, avx2)
176
        return engine;
177
    } else if (flags & ASS_CPU_FLAG_X86_SSE2) {
178
        ALL_PROTOTYPES(16, sse2)
179
        ALL_FUNCTIONS(4, 16, sse2)
180
        if (flags & ASS_CPU_FLAG_X86_SSSE3) {
181
            ALL_PROTOTYPES(16, ssse3)
182
            RASTERIZER_FUNCTION(fill_generic, ssse3)
183
            GENERIC_FUNCTION(be_blur, ssse3)
184
            BLUR_FUNCTION(shrink_horz, 16, ssse3)
185
            BLUR_FUNCTION(expand_horz, 16, ssse3)
186
            PARAM_BLUR_FUNCTION(horz, 16, ssse3)
187
        }
188
        return engine;
189
    }
190
#elif ARCH_AARCH64
191
    if (flags & ASS_CPU_FLAG_ARM_NEON) {
192
        ALL_PROTOTYPES(16, neon)
193
        ALL_FUNCTIONS(4, 16, neon)
194
        return engine;
195
    }
196
#endif
197
#endif
198
199
11.7k
    ALL_FUNCTIONS(4, 16, c)
200
11.7k
    if (mask & ASS_FLAG_WIDE_STRIPE) {
201
0
        BLUR_FUNCTIONS(5, 32, c)
202
0
    }
203
11.7k
    return engine;
204
11.7k
}