/src/libass/libass/ass_bitmap_engine.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2021-2022 libass contributors |
3 | | * |
4 | | * This file is part of libass. |
5 | | * |
6 | | * Permission to use, copy, modify, and distribute this software for any |
7 | | * purpose with or without fee is hereby granted, provided that the above |
8 | | * copyright notice and this permission notice appear in all copies. |
9 | | * |
10 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
11 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
12 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
13 | | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
14 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
15 | | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
16 | | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
17 | | */ |
18 | | |
19 | | #include "config.h" |
20 | | #include "ass_compat.h" |
21 | | |
22 | | #include <stdbool.h> |
23 | | |
24 | | #include "ass_bitmap_engine.h" |
25 | | #include "x86/cpuid.h" |
26 | | |
27 | | |
28 | | #define RASTERIZER_PROTOTYPES(tile_size, suffix) \ |
29 | 23.5k | FillSolidTileFunc ass_fill_solid_tile ## tile_size ## _ ## suffix; \ |
30 | 23.5k | FillHalfplaneTileFunc ass_fill_halfplane_tile ## tile_size ## _ ## suffix; \ |
31 | 23.5k | FillGenericTileFunc ass_fill_generic_tile ## tile_size ## _ ## suffix; \ |
32 | 23.5k | MergeTileFunc ass_merge_tile ## tile_size ## _ ## suffix; |
33 | | |
34 | | #define RASTERIZER_FUNCTION(name, suffix) \ |
35 | 47.1k | engine.name = mask & ASS_FLAG_LARGE_TILES ? \ |
36 | 47.1k | ass_ ## name ## _tile32_ ## suffix : \ |
37 | 47.1k | ass_ ## name ## _tile16_ ## suffix; |
38 | | |
39 | | #define RASTERIZER_FUNCTIONS(suffix) \ |
40 | 11.7k | RASTERIZER_FUNCTION(fill_solid, suffix) \ |
41 | 11.7k | RASTERIZER_FUNCTION(fill_halfplane, suffix) \ |
42 | 11.7k | RASTERIZER_FUNCTION(fill_generic, suffix) \ |
43 | 11.7k | RASTERIZER_FUNCTION(merge, suffix) |
44 | | |
45 | | |
46 | | #define GENERIC_PROTOTYPES(suffix) \ |
47 | 11.7k | BitmapBlendFunc ass_add_bitmaps_ ## suffix; \ |
48 | 11.7k | BitmapBlendFunc ass_imul_bitmaps_ ## suffix; \ |
49 | 11.7k | BitmapMulFunc ass_mul_bitmaps_ ## suffix; \ |
50 | 11.7k | BeBlurFunc ass_be_blur_ ## suffix; |
51 | | |
52 | | #define GENERIC_FUNCTION(name, suffix) \ |
53 | 47.1k | engine.name = ass_ ## name ## _ ## suffix; |
54 | | |
55 | | #define GENERIC_FUNCTIONS(suffix) \ |
56 | 11.7k | GENERIC_FUNCTION(add_bitmaps, suffix) \ |
57 | 11.7k | GENERIC_FUNCTION(imul_bitmaps, suffix) \ |
58 | 11.7k | GENERIC_FUNCTION(mul_bitmaps, suffix) \ |
59 | 11.7k | GENERIC_FUNCTION(be_blur, suffix) |
60 | | |
61 | | |
62 | | #define PARAM_BLUR_SET(suffix) \ |
63 | | ass_blur4_ ## suffix, \ |
64 | | ass_blur5_ ## suffix, \ |
65 | | ass_blur6_ ## suffix, \ |
66 | | ass_blur7_ ## suffix, \ |
67 | | ass_blur8_ ## suffix |
68 | | |
69 | | #define BLUR_PROTOTYPES(stripe_width, suffix) \ |
70 | 23.5k | Convert8to16Func ass_stripe_unpack ## stripe_width ## _ ## suffix; \ |
71 | 23.5k | Convert16to8Func ass_stripe_pack ## stripe_width ## _ ## suffix; \ |
72 | 23.5k | FilterFunc ass_shrink_horz ## stripe_width ## _ ## suffix; \ |
73 | 23.5k | FilterFunc ass_shrink_vert ## stripe_width ## _ ## suffix; \ |
74 | 23.5k | FilterFunc ass_expand_horz ## stripe_width ## _ ## suffix; \ |
75 | 23.5k | FilterFunc ass_expand_vert ## stripe_width ## _ ## suffix; \ |
76 | 23.5k | ParamFilterFunc PARAM_BLUR_SET(horz ## stripe_width ## _ ## suffix); \ |
77 | 23.5k | ParamFilterFunc PARAM_BLUR_SET(vert ## stripe_width ## _ ## suffix); |
78 | | |
79 | | #define BLUR_FUNCTION(name, alignment, suffix) \ |
80 | 70.7k | engine.name = ass_ ## name ## alignment ## _ ## suffix; |
81 | | |
82 | | #define PARAM_BLUR_FUNCTION(dir, alignment, suffix) \ |
83 | 23.5k | engine.blur_ ## dir[0] = ass_blur4_ ## dir ## alignment ## _ ## suffix; \ |
84 | 23.5k | engine.blur_ ## dir[1] = ass_blur5_ ## dir ## alignment ## _ ## suffix; \ |
85 | 23.5k | engine.blur_ ## dir[2] = ass_blur6_ ## dir ## alignment ## _ ## suffix; \ |
86 | 23.5k | engine.blur_ ## dir[3] = ass_blur7_ ## dir ## alignment ## _ ## suffix; \ |
87 | 23.5k | engine.blur_ ## dir[4] = ass_blur8_ ## dir ## alignment ## _ ## suffix; |
88 | | |
89 | | #define BLUR_FUNCTIONS(align_order_, alignment, suffix) \ |
90 | 11.7k | BLUR_FUNCTION(stripe_unpack, alignment, suffix) \ |
91 | 11.7k | BLUR_FUNCTION(stripe_pack, alignment, suffix) \ |
92 | 11.7k | BLUR_FUNCTION(shrink_horz, alignment, suffix) \ |
93 | 11.7k | BLUR_FUNCTION(shrink_vert, alignment, suffix) \ |
94 | 11.7k | BLUR_FUNCTION(expand_horz, alignment, suffix) \ |
95 | 11.7k | BLUR_FUNCTION(expand_vert, alignment, suffix) \ |
96 | 11.7k | PARAM_BLUR_FUNCTION(horz, alignment, suffix) \ |
97 | 11.7k | PARAM_BLUR_FUNCTION(vert, alignment, suffix) \ |
98 | 11.7k | engine.align_order = align_order_; |
99 | | |
100 | | |
101 | | #define ALL_PROTOTYPES(alignment, suffix) \ |
102 | 11.7k | RASTERIZER_PROTOTYPES(16, suffix) \ |
103 | 11.7k | RASTERIZER_PROTOTYPES(32, suffix) \ |
104 | 11.7k | GENERIC_PROTOTYPES(suffix) \ |
105 | 11.7k | BLUR_PROTOTYPES(alignment, suffix) |
106 | | |
107 | | #define ALL_FUNCTIONS(align_order_, alignment, suffix) \ |
108 | 11.7k | RASTERIZER_FUNCTIONS(suffix) \ |
109 | 11.7k | GENERIC_FUNCTIONS(suffix) \ |
110 | 11.7k | BLUR_FUNCTIONS(align_order_, alignment, suffix) |
111 | | |
112 | | |
113 | | unsigned ass_get_cpu_flags(unsigned mask) |
114 | 0 | { |
115 | 0 | unsigned flags = ASS_CPU_FLAG_NONE; |
116 | |
|
117 | | #if CONFIG_ASM && ARCH_X86 |
118 | | |
119 | | if (!ass_has_cpuid()) |
120 | | return flags & mask; |
121 | | |
122 | | uint32_t eax = 0, ebx, ecx, edx; |
123 | | ass_get_cpuid(&eax, &ebx, &ecx, &edx); |
124 | | uint32_t max_leaf = eax; |
125 | | |
126 | | bool avx = false; |
127 | | if (max_leaf >= 1) { |
128 | | eax = 1; |
129 | | ass_get_cpuid(&eax, &ebx, &ecx, &edx); |
130 | | if (edx & (1 << 26)) { // SSE2 |
131 | | flags |= ASS_CPU_FLAG_X86_SSE2; |
132 | | if (ecx & (1 << 0) && // SSE3 |
133 | | ecx & (1 << 9)) // SSSE3 |
134 | | flags |= ASS_CPU_FLAG_X86_SSSE3; |
135 | | } |
136 | | |
137 | | if (ecx & (1 << 27) && // OSXSAVE |
138 | | ecx & (1 << 28)) { // AVX |
139 | | uint32_t xcr0l, xcr0h; |
140 | | ass_get_xgetbv(0, &xcr0l, &xcr0h); |
141 | | if (xcr0l & (1 << 1) && // XSAVE for XMM |
142 | | xcr0l & (1 << 2)) // XSAVE for YMM |
143 | | avx = true; |
144 | | } |
145 | | } |
146 | | |
147 | | if (max_leaf >= 7) { |
148 | | eax = 7; |
149 | | ass_get_cpuid(&eax, &ebx, &ecx, &edx); |
150 | | if (avx && ebx & (1 << 5)) // AVX2 |
151 | | flags |= ASS_CPU_FLAG_X86_AVX2; |
152 | | } |
153 | | |
154 | | #endif |
155 | |
|
156 | | #if ARCH_AARCH64 |
157 | | flags = ASS_CPU_FLAG_ARM_NEON; |
158 | | #endif |
159 | |
|
160 | 0 | return flags & mask; |
161 | 0 | } |
162 | | |
163 | | BitmapEngine ass_bitmap_engine_init(unsigned mask) |
164 | 11.7k | { |
165 | 11.7k | ALL_PROTOTYPES(16, c) |
166 | 11.7k | BLUR_PROTOTYPES(32, c) |
167 | 11.7k | BitmapEngine engine = {0}; |
168 | 11.7k | engine.tile_order = mask & ASS_FLAG_LARGE_TILES ? 5 : 4; |
169 | | |
170 | | #if CONFIG_ASM |
171 | | unsigned flags = ass_get_cpu_flags(mask); |
172 | | #if ARCH_X86 |
173 | | if (flags & ASS_CPU_FLAG_X86_AVX2) { |
174 | | ALL_PROTOTYPES(32, avx2) |
175 | | ALL_FUNCTIONS(5, 32, avx2) |
176 | | return engine; |
177 | | } else if (flags & ASS_CPU_FLAG_X86_SSE2) { |
178 | | ALL_PROTOTYPES(16, sse2) |
179 | | ALL_FUNCTIONS(4, 16, sse2) |
180 | | if (flags & ASS_CPU_FLAG_X86_SSSE3) { |
181 | | ALL_PROTOTYPES(16, ssse3) |
182 | | RASTERIZER_FUNCTION(fill_generic, ssse3) |
183 | | GENERIC_FUNCTION(be_blur, ssse3) |
184 | | BLUR_FUNCTION(shrink_horz, 16, ssse3) |
185 | | BLUR_FUNCTION(expand_horz, 16, ssse3) |
186 | | PARAM_BLUR_FUNCTION(horz, 16, ssse3) |
187 | | } |
188 | | return engine; |
189 | | } |
190 | | #elif ARCH_AARCH64 |
191 | | if (flags & ASS_CPU_FLAG_ARM_NEON) { |
192 | | ALL_PROTOTYPES(16, neon) |
193 | | ALL_FUNCTIONS(4, 16, neon) |
194 | | return engine; |
195 | | } |
196 | | #endif |
197 | | #endif |
198 | | |
199 | 11.7k | ALL_FUNCTIONS(4, 16, c) |
200 | 11.7k | if (mask & ASS_FLAG_WIDE_STRIPE) { |
201 | 0 | BLUR_FUNCTIONS(5, 32, c) |
202 | 0 | } |
203 | 11.7k | return engine; |
204 | 11.7k | } |