/src/libass/libass/c/rasterizer_template.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2014-2022 libass contributors |
3 | | * |
4 | | * This file is part of libass. |
5 | | * |
6 | | * Permission to use, copy, modify, and distribute this software for any |
7 | | * purpose with or without fee is hereby granted, provided that the above |
8 | | * copyright notice and this permission notice appear in all copies. |
9 | | * |
10 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
11 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
12 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
13 | | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
14 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
15 | | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
16 | | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
17 | | */ |
18 | | |
19 | | #if TILE_SIZE == 16 |
20 | 2.75M | #define SUFFIX(name) name ## 16_c |
21 | 25.4M | #define TILE_ORDER 4 |
22 | | #elif TILE_SIZE == 32 |
23 | 0 | #define SUFFIX(name) name ## 32_c |
24 | 0 | #define TILE_ORDER 5 |
25 | | #else |
26 | | #error Unsupported tile size |
27 | | #endif |
28 | | |
29 | 4.52M | #define FULL_VALUE (1 << (14 - TILE_ORDER)) |
30 | | #define RESCALE_AB(ab, scale) \ |
31 | 3.53M | (((ab) * (int64_t) (scale) + ((int64_t) 1 << (45 + TILE_ORDER))) >> (46 + TILE_ORDER)) |
32 | | #define RESCALE_C(c, scale) \ |
33 | 1.76M | (((int32_t) ((c) >> (7 + TILE_ORDER)) * (int64_t) (scale) + ((int64_t) 1 << 44)) >> 45) |
34 | | |
35 | | |
36 | | void SUFFIX(ass_fill_solid_tile)(uint8_t *buf, ptrdiff_t stride, int set) |
37 | 25.1k | { |
38 | 25.1k | ASSUME(!((uintptr_t) buf % ALIGNMENT) && !(stride % ALIGNMENT)); |
39 | | |
40 | 25.1k | uint8_t value = set ? 255 : 0; |
41 | 426k | for (int y = 0; y < TILE_SIZE; y++) { |
42 | 6.83M | for (int x = 0; x < TILE_SIZE; x++) |
43 | 6.42M | buf[x] = value; |
44 | 401k | buf += stride; |
45 | 401k | } |
46 | 25.1k | } Line | Count | Source | 37 | 25.1k | { | 38 | 25.1k | ASSUME(!((uintptr_t) buf % ALIGNMENT) && !(stride % ALIGNMENT)); | 39 | | | 40 | 25.1k | uint8_t value = set ? 255 : 0; | 41 | 426k | for (int y = 0; y < TILE_SIZE; y++) { | 42 | 6.83M | for (int x = 0; x < TILE_SIZE; x++) | 43 | 6.42M | buf[x] = value; | 44 | 401k | buf += stride; | 45 | 401k | } | 46 | 25.1k | } |
Unexecuted instantiation: ass_fill_solid_tile32_c |
47 | | |
48 | | |
49 | | /* |
50 | | * Halfplane Filling Functions |
51 | | * |
52 | | * Fill pixels with antialiasing corresponding to equation |
53 | | * A * x + B * y < C, where |
54 | | * x, y - offset of pixel center from bottom-left, |
55 | | * A = a * scale, B = b * scale, C = c * scale / 64. |
56 | | * |
57 | | * Normalization of coefficients prior call: |
58 | | * max(abs(a), abs(b)) * scale = 1 << 61 |
59 | | * |
60 | | * Used Algorithm |
61 | | * Let |
62 | | * max_ab = max(abs(A), abs(B)), |
63 | | * min_ab = min(abs(A), abs(B)), |
64 | | * CC = C - A * x - B * y, then |
65 | | * result = (clamp((CC - min_ab / 4) / max_ab) + |
66 | | * clamp((CC + min_ab / 4) / max_ab) + |
67 | | * 1) / 2, |
68 | | * where clamp(Z) = max(-0.5, min(0.5, Z)). |
69 | | */ |
70 | | |
71 | | void SUFFIX(ass_fill_halfplane_tile)(uint8_t *buf, ptrdiff_t stride, |
72 | | int32_t a, int32_t b, int64_t c, int32_t scale) |
73 | 397 | { |
74 | 397 | ASSUME(!((uintptr_t) buf % ALIGNMENT) && !(stride % ALIGNMENT)); |
75 | | |
76 | 397 | int16_t aa = RESCALE_AB(a, scale), bb = RESCALE_AB(b, scale); |
77 | 397 | int16_t cc = RESCALE_C(c, scale) + FULL_VALUE / 2 - ((aa + bb) >> 1); |
78 | | |
79 | 397 | int16_t abs_a = aa < 0 ? -aa : aa; |
80 | 397 | int16_t abs_b = bb < 0 ? -bb : bb; |
81 | 397 | int16_t delta = (FFMIN(abs_a, abs_b) + 2) >> 2; |
82 | | |
83 | 397 | int16_t va1[TILE_SIZE], va2[TILE_SIZE]; |
84 | 6.74k | for (int x = 0; x < TILE_SIZE; x++) { |
85 | 6.35k | va1[x] = aa * x - delta; |
86 | 6.35k | va2[x] = aa * x + delta; |
87 | 6.35k | } |
88 | | |
89 | 6.74k | for (int y = 0; y < TILE_SIZE; y++) { |
90 | 107k | for (int x = 0; x < TILE_SIZE; x++) { |
91 | 101k | int16_t c1 = cc - va1[x]; |
92 | 101k | int16_t c2 = cc - va2[x]; |
93 | 101k | c1 = FFMINMAX(c1, 0, FULL_VALUE); |
94 | 101k | c2 = FFMINMAX(c2, 0, FULL_VALUE); |
95 | 101k | int16_t res = (c1 + c2) >> (7 - TILE_ORDER); |
96 | 101k | buf[x] = FFMIN(res, 255); |
97 | 101k | } |
98 | 6.35k | buf += stride; |
99 | 6.35k | cc -= bb; |
100 | 6.35k | } |
101 | 397 | } ass_fill_halfplane_tile16_c Line | Count | Source | 73 | 397 | { | 74 | 397 | ASSUME(!((uintptr_t) buf % ALIGNMENT) && !(stride % ALIGNMENT)); | 75 | | | 76 | 397 | int16_t aa = RESCALE_AB(a, scale), bb = RESCALE_AB(b, scale); | 77 | 397 | int16_t cc = RESCALE_C(c, scale) + FULL_VALUE / 2 - ((aa + bb) >> 1); | 78 | | | 79 | 397 | int16_t abs_a = aa < 0 ? -aa : aa; | 80 | 397 | int16_t abs_b = bb < 0 ? -bb : bb; | 81 | 397 | int16_t delta = (FFMIN(abs_a, abs_b) + 2) >> 2; | 82 | | | 83 | 397 | int16_t va1[TILE_SIZE], va2[TILE_SIZE]; | 84 | 6.74k | for (int x = 0; x < TILE_SIZE; x++) { | 85 | 6.35k | va1[x] = aa * x - delta; | 86 | 6.35k | va2[x] = aa * x + delta; | 87 | 6.35k | } | 88 | | | 89 | 6.74k | for (int y = 0; y < TILE_SIZE; y++) { | 90 | 107k | for (int x = 0; x < TILE_SIZE; x++) { | 91 | 101k | int16_t c1 = cc - va1[x]; | 92 | 101k | int16_t c2 = cc - va2[x]; | 93 | 101k | c1 = FFMINMAX(c1, 0, FULL_VALUE); | 94 | 101k | c2 = FFMINMAX(c2, 0, FULL_VALUE); | 95 | 101k | int16_t res = (c1 + c2) >> (7 - TILE_ORDER); | 96 | 101k | buf[x] = FFMIN(res, 255); | 97 | 101k | } | 98 | 6.35k | buf += stride; | 99 | 6.35k | cc -= bb; | 100 | 6.35k | } | 101 | 397 | } |
Unexecuted instantiation: ass_fill_halfplane_tile32_c |
102 | | |
103 | | |
104 | | /* |
105 | | * Generic Filling Functions |
106 | | * |
107 | | * Used Algorithm |
108 | | * Construct trapezoid from each polyline segment and its projection into left side of tile. |
109 | | * Render that trapezoid into internal buffer with additive blending and correct sign. |
110 | | * Store clamped absolute value from internal buffer into result buffer. |
111 | | */ |
112 | | |
113 | | // Render top/bottom line of the trapezoid with antialiasing |
114 | | static inline void SUFFIX(update_border_line)(int16_t res[TILE_SIZE], |
115 | | int16_t abs_a, const int16_t va[TILE_SIZE], |
116 | | int16_t b, int16_t abs_b, |
117 | | int16_t c, int up, int dn) |
118 | 2.75M | { |
119 | 2.75M | int16_t size = dn - up; |
120 | 2.75M | int16_t w = FULL_VALUE + (size << (8 - TILE_ORDER)) - abs_a; |
121 | 2.75M | w = FFMIN(w, FULL_VALUE) << (2 * TILE_ORDER - 5); |
122 | | |
123 | 2.75M | int16_t dc_b = abs_b * (int32_t) size >> 6; |
124 | 2.75M | int16_t dc = (FFMIN(abs_a, dc_b) + 2) >> 2; |
125 | | |
126 | 2.75M | int16_t base = (int32_t) b * (int16_t) (up + dn) >> 7; |
127 | 2.75M | int16_t offs1 = size - ((base + dc) * (int32_t) w >> 16); |
128 | 2.75M | int16_t offs2 = size - ((base - dc) * (int32_t) w >> 16); |
129 | | |
130 | 2.75M | size <<= 1; |
131 | 46.8M | for (int x = 0; x < TILE_SIZE; x++) { |
132 | 44.0M | int16_t cw = (c - va[x]) * (int32_t) w >> 16; |
133 | 44.0M | int16_t c1 = cw + offs1; |
134 | 44.0M | int16_t c2 = cw + offs2; |
135 | 44.0M | c1 = FFMINMAX(c1, 0, size); |
136 | 44.0M | c2 = FFMINMAX(c2, 0, size); |
137 | 44.0M | res[x] += c1 + c2; |
138 | 44.0M | } |
139 | 2.75M | } c_rasterizer.c:update_border_line16_c Line | Count | Source | 118 | 2.75M | { | 119 | 2.75M | int16_t size = dn - up; | 120 | 2.75M | int16_t w = FULL_VALUE + (size << (8 - TILE_ORDER)) - abs_a; | 121 | 2.75M | w = FFMIN(w, FULL_VALUE) << (2 * TILE_ORDER - 5); | 122 | | | 123 | 2.75M | int16_t dc_b = abs_b * (int32_t) size >> 6; | 124 | 2.75M | int16_t dc = (FFMIN(abs_a, dc_b) + 2) >> 2; | 125 | | | 126 | 2.75M | int16_t base = (int32_t) b * (int16_t) (up + dn) >> 7; | 127 | 2.75M | int16_t offs1 = size - ((base + dc) * (int32_t) w >> 16); | 128 | 2.75M | int16_t offs2 = size - ((base - dc) * (int32_t) w >> 16); | 129 | | | 130 | 2.75M | size <<= 1; | 131 | 46.8M | for (int x = 0; x < TILE_SIZE; x++) { | 132 | 44.0M | int16_t cw = (c - va[x]) * (int32_t) w >> 16; | 133 | 44.0M | int16_t c1 = cw + offs1; | 134 | 44.0M | int16_t c2 = cw + offs2; | 135 | 44.0M | c1 = FFMINMAX(c1, 0, size); | 136 | 44.0M | c2 = FFMINMAX(c2, 0, size); | 137 | 44.0M | res[x] += c1 + c2; | 138 | 44.0M | } | 139 | 2.75M | } |
Unexecuted instantiation: c_rasterizer.c:update_border_line32_c |
140 | | |
141 | | void SUFFIX(ass_fill_generic_tile)(uint8_t *restrict buf, ptrdiff_t stride, |
142 | | const struct segment *restrict line, size_t n_lines, |
143 | | int winding) |
144 | 56.3k | { |
145 | 56.3k | ASSUME(!((uintptr_t) buf % ALIGNMENT) && !(stride % ALIGNMENT)); |
146 | | |
147 | 56.3k | int16_t res[TILE_SIZE][TILE_SIZE] = {0}; |
148 | 56.3k | int16_t delta[TILE_SIZE + 2] = {0}; |
149 | | |
150 | 56.3k | const struct segment *end = line + n_lines; |
151 | 2.15M | for (; line != end; ++line) { |
152 | 2.09M | assert(line->y_min >= 0 && line->y_min < 64 << TILE_ORDER); |
153 | 2.09M | assert(line->y_max > 0 && line->y_max <= 64 << TILE_ORDER); |
154 | 2.09M | assert(line->y_min <= line->y_max); |
155 | | |
156 | 2.09M | int16_t up_delta = line->flags & SEGFLAG_DN ? 4 : 0; |
157 | 2.09M | int16_t dn_delta = up_delta; |
158 | 2.09M | if (!line->x_min && (line->flags & SEGFLAG_EXACT_LEFT)) dn_delta ^= 4; |
159 | 2.09M | if (line->flags & SEGFLAG_UL_DR) { |
160 | 1.06M | int16_t tmp = up_delta; |
161 | 1.06M | up_delta = dn_delta; |
162 | 1.06M | dn_delta = tmp; |
163 | 1.06M | } |
164 | | |
165 | 2.09M | int up = line->y_min >> 6, dn = line->y_max >> 6; |
166 | 2.09M | int16_t up_pos = line->y_min & 63; |
167 | 2.09M | int16_t up_delta1 = up_delta * up_pos; |
168 | 2.09M | int16_t dn_pos = line->y_max & 63; |
169 | 2.09M | int16_t dn_delta1 = dn_delta * dn_pos; |
170 | 2.09M | delta[up + 1] -= up_delta1; |
171 | 2.09M | delta[up] -= (up_delta << 6) - up_delta1; |
172 | 2.09M | delta[dn + 1] += dn_delta1; |
173 | 2.09M | delta[dn] += (dn_delta << 6) - dn_delta1; |
174 | 2.09M | if (line->y_min == line->y_max) |
175 | 330k | continue; |
176 | | |
177 | 1.76M | int16_t a = RESCALE_AB(line->a, line->scale); |
178 | 1.76M | int16_t b = RESCALE_AB(line->b, line->scale); |
179 | 1.76M | int16_t c = RESCALE_C(line->c, line->scale) - (a >> 1) - b * up; |
180 | | |
181 | 1.76M | int16_t va[TILE_SIZE]; |
182 | 30.0M | for (int x = 0; x < TILE_SIZE; x++) |
183 | 28.2M | va[x] = a * x; |
184 | 1.76M | int16_t abs_a = a < 0 ? -a : a; |
185 | 1.76M | int16_t abs_b = b < 0 ? -b : b; |
186 | 1.76M | int16_t dc = (FFMIN(abs_a, abs_b) + 2) >> 2; |
187 | 1.76M | int16_t base = FULL_VALUE / 2 - (b >> 1); |
188 | 1.76M | int16_t dc1 = base + dc; |
189 | 1.76M | int16_t dc2 = base - dc; |
190 | | |
191 | 1.76M | if (up_pos) { |
192 | 1.73M | if (dn == up) { |
193 | 709k | SUFFIX(update_border_line)(res[up], abs_a, va, b, abs_b, c, up_pos, dn_pos); |
194 | 709k | continue; |
195 | 709k | } |
196 | 1.02M | SUFFIX(update_border_line)(res[up], abs_a, va, b, abs_b, c, up_pos, 64); |
197 | 1.02M | up++; |
198 | 1.02M | c -= b; |
199 | 1.02M | } |
200 | 1.46M | for (int y = up; y < dn; y++) { |
201 | 6.91M | for (int x = 0; x < TILE_SIZE; x++) { |
202 | 6.51M | int16_t c1 = c - va[x] + dc1; |
203 | 6.51M | int16_t c2 = c - va[x] + dc2; |
204 | 6.51M | c1 = FFMINMAX(c1, 0, FULL_VALUE); |
205 | 6.51M | c2 = FFMINMAX(c2, 0, FULL_VALUE); |
206 | 6.51M | res[y][x] += (c1 + c2) >> (7 - TILE_ORDER); |
207 | 6.51M | } |
208 | 407k | c -= b; |
209 | 407k | } |
210 | 1.05M | if (dn_pos) |
211 | 1.02M | SUFFIX(update_border_line)(res[dn], abs_a, va, b, abs_b, c, 0, dn_pos); |
212 | 1.05M | } |
213 | | |
214 | 56.3k | int16_t cur = 256 * (int8_t) winding; |
215 | 958k | for (int y = 0; y < TILE_SIZE; y++) { |
216 | 901k | cur += delta[y]; |
217 | 15.3M | for (int x = 0; x < TILE_SIZE; x++) { |
218 | 14.4M | int16_t val = res[y][x] + cur, neg_val = -val; |
219 | 14.4M | val = (val > neg_val ? val : neg_val); |
220 | 14.4M | buf[x] = FFMIN(val, 255); |
221 | 14.4M | } |
222 | 901k | buf += stride; |
223 | 901k | } |
224 | 56.3k | } ass_fill_generic_tile16_c Line | Count | Source | 144 | 56.3k | { | 145 | 56.3k | ASSUME(!((uintptr_t) buf % ALIGNMENT) && !(stride % ALIGNMENT)); | 146 | | | 147 | 56.3k | int16_t res[TILE_SIZE][TILE_SIZE] = {0}; | 148 | 56.3k | int16_t delta[TILE_SIZE + 2] = {0}; | 149 | | | 150 | 56.3k | const struct segment *end = line + n_lines; | 151 | 2.15M | for (; line != end; ++line) { | 152 | 2.09M | assert(line->y_min >= 0 && line->y_min < 64 << TILE_ORDER); | 153 | 2.09M | assert(line->y_max > 0 && line->y_max <= 64 << TILE_ORDER); | 154 | 2.09M | assert(line->y_min <= line->y_max); | 155 | | | 156 | 2.09M | int16_t up_delta = line->flags & SEGFLAG_DN ? 4 : 0; | 157 | 2.09M | int16_t dn_delta = up_delta; | 158 | 2.09M | if (!line->x_min && (line->flags & SEGFLAG_EXACT_LEFT)) dn_delta ^= 4; | 159 | 2.09M | if (line->flags & SEGFLAG_UL_DR) { | 160 | 1.06M | int16_t tmp = up_delta; | 161 | 1.06M | up_delta = dn_delta; | 162 | 1.06M | dn_delta = tmp; | 163 | 1.06M | } | 164 | | | 165 | 2.09M | int up = line->y_min >> 6, dn = line->y_max >> 6; | 166 | 2.09M | int16_t up_pos = line->y_min & 63; | 167 | 2.09M | int16_t up_delta1 = up_delta * up_pos; | 168 | 2.09M | int16_t dn_pos = line->y_max & 63; | 169 | 2.09M | int16_t dn_delta1 = dn_delta * dn_pos; | 170 | 2.09M | delta[up + 1] -= up_delta1; | 171 | 2.09M | delta[up] -= (up_delta << 6) - up_delta1; | 172 | 2.09M | delta[dn + 1] += dn_delta1; | 173 | 2.09M | delta[dn] += (dn_delta << 6) - dn_delta1; | 174 | 2.09M | if (line->y_min == line->y_max) | 175 | 330k | continue; | 176 | | | 177 | 1.76M | int16_t a = RESCALE_AB(line->a, line->scale); | 178 | 1.76M | int16_t b = RESCALE_AB(line->b, line->scale); | 179 | 1.76M | int16_t c = RESCALE_C(line->c, line->scale) - (a >> 1) - b * up; | 180 | | | 181 | 1.76M | int16_t va[TILE_SIZE]; | 182 | 30.0M | for (int x = 0; x < TILE_SIZE; x++) | 183 | 28.2M | va[x] = a * x; | 184 | 1.76M | int16_t abs_a = a < 0 ? -a : a; | 185 | 1.76M | int16_t abs_b = b < 0 ? -b : b; | 186 | 1.76M | int16_t dc = (FFMIN(abs_a, abs_b) + 2) >> 2; | 187 | 1.76M | int16_t base = FULL_VALUE / 2 - (b >> 1); | 188 | 1.76M | int16_t dc1 = base + dc; | 189 | 1.76M | int16_t dc2 = base - dc; | 190 | | | 191 | 1.76M | if (up_pos) { | 192 | 1.73M | if (dn == up) { | 193 | 709k | SUFFIX(update_border_line)(res[up], abs_a, va, b, abs_b, c, up_pos, dn_pos); | 194 | 709k | continue; | 195 | 709k | } | 196 | 1.02M | SUFFIX(update_border_line)(res[up], abs_a, va, b, abs_b, c, up_pos, 64); | 197 | 1.02M | up++; | 198 | 1.02M | c -= b; | 199 | 1.02M | } | 200 | 1.46M | for (int y = up; y < dn; y++) { | 201 | 6.91M | for (int x = 0; x < TILE_SIZE; x++) { | 202 | 6.51M | int16_t c1 = c - va[x] + dc1; | 203 | 6.51M | int16_t c2 = c - va[x] + dc2; | 204 | 6.51M | c1 = FFMINMAX(c1, 0, FULL_VALUE); | 205 | 6.51M | c2 = FFMINMAX(c2, 0, FULL_VALUE); | 206 | 6.51M | res[y][x] += (c1 + c2) >> (7 - TILE_ORDER); | 207 | 6.51M | } | 208 | 407k | c -= b; | 209 | 407k | } | 210 | 1.05M | if (dn_pos) | 211 | 1.02M | SUFFIX(update_border_line)(res[dn], abs_a, va, b, abs_b, c, 0, dn_pos); | 212 | 1.05M | } | 213 | | | 214 | 56.3k | int16_t cur = 256 * (int8_t) winding; | 215 | 958k | for (int y = 0; y < TILE_SIZE; y++) { | 216 | 901k | cur += delta[y]; | 217 | 15.3M | for (int x = 0; x < TILE_SIZE; x++) { | 218 | 14.4M | int16_t val = res[y][x] + cur, neg_val = -val; | 219 | 14.4M | val = (val > neg_val ? val : neg_val); | 220 | 14.4M | buf[x] = FFMIN(val, 255); | 221 | 14.4M | } | 222 | 901k | buf += stride; | 223 | 901k | } | 224 | 56.3k | } |
Unexecuted instantiation: ass_fill_generic_tile32_c |
225 | | |
226 | | |
227 | | void SUFFIX(ass_merge_tile)(uint8_t *restrict buf, ptrdiff_t stride, |
228 | | const uint8_t *restrict tile) |
229 | 18.9k | { |
230 | 18.9k | ASSUME(!((uintptr_t) buf % ALIGNMENT) && !(stride % ALIGNMENT)); |
231 | | |
232 | 321k | for (int y = 0; y < TILE_SIZE; y++) { |
233 | 5.14M | for (int x = 0; x < TILE_SIZE; x++) |
234 | 4.84M | buf[x] = FFMAX(buf[x], tile[x]); |
235 | 302k | buf += stride; |
236 | 302k | tile += TILE_SIZE; |
237 | 302k | } |
238 | 18.9k | } Line | Count | Source | 229 | 18.9k | { | 230 | 18.9k | ASSUME(!((uintptr_t) buf % ALIGNMENT) && !(stride % ALIGNMENT)); | 231 | | | 232 | 321k | for (int y = 0; y < TILE_SIZE; y++) { | 233 | 5.14M | for (int x = 0; x < TILE_SIZE; x++) | 234 | 4.84M | buf[x] = FFMAX(buf[x], tile[x]); | 235 | 302k | buf += stride; | 236 | 302k | tile += TILE_SIZE; | 237 | 302k | } | 238 | 18.9k | } |
Unexecuted instantiation: ass_merge_tile32_c |
239 | | |
240 | | |
241 | | #undef SUFFIX |
242 | | #undef TILE_ORDER |
243 | | #undef FULL_VALUE |
244 | | #undef RESCALE_AB |
245 | | #undef RESCALE_C |