/src/libass/libass/c/blur_template.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2015-2022 libass contributors |
3 | | * |
4 | | * This file is part of libass. |
5 | | * |
6 | | * Permission to use, copy, modify, and distribute this software for any |
7 | | * purpose with or without fee is hereby granted, provided that the above |
8 | | * copyright notice and this permission notice appear in all copies. |
9 | | * |
10 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
11 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
12 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
13 | | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
14 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
15 | | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
16 | | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
17 | | */ |
18 | | |
19 | 10.1M | #define STRIPE_WIDTH (ALIGNMENT / 2) |
20 | 108 | #define STRIPE_MASK (STRIPE_WIDTH - 1) |
21 | | |
22 | | inline static void SUFFIX(copy_line)(int16_t *buf, const int16_t *ptr, size_t offs, size_t size) |
23 | 17.4k | { |
24 | 17.4k | memcpy(buf, get_line(ptr, offs, size), STRIPE_WIDTH * sizeof(buf[0])); |
25 | 17.4k | } Line | Count | Source | 23 | 17.4k | { | 24 | 17.4k | memcpy(buf, get_line(ptr, offs, size), STRIPE_WIDTH * sizeof(buf[0])); | 25 | 17.4k | } |
Unexecuted instantiation: c_blur.c:copy_line32_c |
26 | | |
27 | 17.4k | #define copy_line SUFFIX(copy_line) |
28 | | |
29 | | /* |
30 | | * Unpack/Pack Functions |
31 | | * |
32 | | * Convert between regular 8-bit bitmap and internal format. |
33 | | * Internal image is stored as set of vertical stripes of size [STRIPE_WIDTH x height]. |
34 | | * Each pixel is represented as 16-bit integer in range of [0-0x4000]. |
35 | | */ |
36 | | |
37 | | void SUFFIX(ass_stripe_unpack)(int16_t *restrict dst, const uint8_t *restrict src, |
38 | | ptrdiff_t src_stride, size_t width, size_t height) |
39 | 7 | { |
40 | 7 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); |
41 | 7 | ASSUME(!((uintptr_t) src % ALIGNMENT) && !(src_stride % ALIGNMENT)); |
42 | 7 | ASSUME(width > 0 && height > 0); |
43 | | |
44 | 123 | for (size_t y = 0; y < height; y++) { |
45 | 116 | int16_t *ptr = dst; |
46 | 468 | for (size_t x = 0; x < width; x += STRIPE_WIDTH) { |
47 | 3.16k | for (int k = 0; k < STRIPE_WIDTH; k++) |
48 | 2.81k | ptr[k] = (uint16_t) (((src[x + k] << 7) | (src[x + k] >> 1)) + 1) >> 1; |
49 | | //ptr[k] = (0x4000 * src[x + k] + 127) / 255; |
50 | 352 | ptr += STRIPE_WIDTH * height; |
51 | 352 | } |
52 | 116 | dst += STRIPE_WIDTH; |
53 | 116 | src += src_stride; |
54 | 116 | } |
55 | 7 | } Line | Count | Source | 39 | 7 | { | 40 | 7 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); | 41 | 7 | ASSUME(!((uintptr_t) src % ALIGNMENT) && !(src_stride % ALIGNMENT)); | 42 | 7 | ASSUME(width > 0 && height > 0); | 43 | | | 44 | 123 | for (size_t y = 0; y < height; y++) { | 45 | 116 | int16_t *ptr = dst; | 46 | 468 | for (size_t x = 0; x < width; x += STRIPE_WIDTH) { | 47 | 3.16k | for (int k = 0; k < STRIPE_WIDTH; k++) | 48 | 2.81k | ptr[k] = (uint16_t) (((src[x + k] << 7) | (src[x + k] >> 1)) + 1) >> 1; | 49 | | //ptr[k] = (0x4000 * src[x + k] + 127) / 255; | 50 | 352 | ptr += STRIPE_WIDTH * height; | 51 | 352 | } | 52 | 116 | dst += STRIPE_WIDTH; | 53 | 116 | src += src_stride; | 54 | 116 | } | 55 | 7 | } |
Unexecuted instantiation: ass_stripe_unpack32_c |
56 | | |
57 | | void SUFFIX(ass_stripe_pack)(uint8_t *restrict dst, ptrdiff_t dst_stride, |
58 | | const int16_t *restrict src, size_t width, size_t height) |
59 | 7 | { |
60 | 7 | ASSUME(!((uintptr_t) dst % ALIGNMENT) && !(dst_stride % ALIGNMENT)); |
61 | 7 | ASSUME(!((uintptr_t) src % ALIGNMENT)); |
62 | 7 | ASSUME(width > 0 && height > 0); |
63 | | |
64 | 419 | for (size_t x = 0; x < width; x += STRIPE_WIDTH) { |
65 | 412 | uint8_t *ptr = dst; |
66 | 319k | for (size_t y = 0; y < height; y++) { |
67 | 318k | const int16_t *dither = dither_line + 16 * (y & 1); |
68 | 2.86M | for (int k = 0; k < STRIPE_WIDTH; k++) |
69 | 2.54M | ptr[k] = (uint16_t) (src[k] - (src[k] >> 8) + dither[k]) >> 6; |
70 | | //ptr[k] = (255 * src[k] + 0x1FFF) / 0x4000; |
71 | 318k | ptr += dst_stride; |
72 | 318k | src += STRIPE_WIDTH; |
73 | 318k | } |
74 | 412 | dst += STRIPE_WIDTH; |
75 | 412 | } |
76 | 7 | size_t left = dst_stride - ((width + STRIPE_MASK) & ~STRIPE_MASK); |
77 | 3.26k | for (size_t y = 0; y < height; y++) { |
78 | 3.69k | for (size_t x = 0; x < left; x++) |
79 | 432 | dst[x] = 0; |
80 | 3.26k | dst += dst_stride; |
81 | 3.26k | } |
82 | 7 | } Line | Count | Source | 59 | 7 | { | 60 | 7 | ASSUME(!((uintptr_t) dst % ALIGNMENT) && !(dst_stride % ALIGNMENT)); | 61 | 7 | ASSUME(!((uintptr_t) src % ALIGNMENT)); | 62 | 7 | ASSUME(width > 0 && height > 0); | 63 | | | 64 | 419 | for (size_t x = 0; x < width; x += STRIPE_WIDTH) { | 65 | 412 | uint8_t *ptr = dst; | 66 | 319k | for (size_t y = 0; y < height; y++) { | 67 | 318k | const int16_t *dither = dither_line + 16 * (y & 1); | 68 | 2.86M | for (int k = 0; k < STRIPE_WIDTH; k++) | 69 | 2.54M | ptr[k] = (uint16_t) (src[k] - (src[k] >> 8) + dither[k]) >> 6; | 70 | | //ptr[k] = (255 * src[k] + 0x1FFF) / 0x4000; | 71 | 318k | ptr += dst_stride; | 72 | 318k | src += STRIPE_WIDTH; | 73 | 318k | } | 74 | 412 | dst += STRIPE_WIDTH; | 75 | 412 | } | 76 | 7 | size_t left = dst_stride - ((width + STRIPE_MASK) & ~STRIPE_MASK); | 77 | 3.26k | for (size_t y = 0; y < height; y++) { | 78 | 3.69k | for (size_t x = 0; x < left; x++) | 79 | 432 | dst[x] = 0; | 80 | 3.26k | dst += dst_stride; | 81 | 3.26k | } | 82 | 7 | } |
Unexecuted instantiation: ass_stripe_pack32_c |
83 | | |
84 | | /* |
85 | | * Contract Filters |
86 | | * |
87 | | * Contract image by factor 2 with kernel [1, 5, 10, 10, 5, 1]. |
88 | | */ |
89 | | |
90 | | void SUFFIX(ass_shrink_horz)(int16_t *restrict dst, const int16_t *restrict src, |
91 | | size_t src_width, size_t src_height) |
92 | 20 | { |
93 | 20 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); |
94 | 20 | ASSUME(!((uintptr_t) src % ALIGNMENT)); |
95 | 20 | ASSUME(src_width > 0 && src_height > 0); |
96 | | |
97 | 20 | size_t dst_width = (src_width + 5) >> 1; |
98 | 20 | size_t size = ((src_width + STRIPE_MASK) & ~STRIPE_MASK) * src_height; |
99 | 20 | size_t step = STRIPE_WIDTH * src_height; |
100 | | |
101 | 20 | size_t offs = 0; |
102 | 20 | int16_t buf[3 * STRIPE_WIDTH]; |
103 | 20 | int16_t *ptr = buf + STRIPE_WIDTH; |
104 | 48 | for (size_t x = 0; x < dst_width; x += STRIPE_WIDTH) { |
105 | 168 | for (size_t y = 0; y < src_height; y++) { |
106 | 140 | copy_line(ptr - 1 * STRIPE_WIDTH, src, offs - 1 * step, size); |
107 | 140 | copy_line(ptr + 0 * STRIPE_WIDTH, src, offs + 0 * step, size); |
108 | 140 | copy_line(ptr + 1 * STRIPE_WIDTH, src, offs + 1 * step, size); |
109 | 1.26k | for (int k = 0; k < STRIPE_WIDTH; k++) |
110 | 1.12k | dst[k] = shrink_func(ptr[2 * k - 4], ptr[2 * k - 3], |
111 | 1.12k | ptr[2 * k - 2], ptr[2 * k - 1], |
112 | 1.12k | ptr[2 * k + 0], ptr[2 * k + 1]); |
113 | 140 | dst += STRIPE_WIDTH; |
114 | 140 | offs += STRIPE_WIDTH; |
115 | 140 | } |
116 | 28 | offs += step; |
117 | 28 | } |
118 | 20 | } Line | Count | Source | 92 | 20 | { | 93 | 20 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); | 94 | 20 | ASSUME(!((uintptr_t) src % ALIGNMENT)); | 95 | 20 | ASSUME(src_width > 0 && src_height > 0); | 96 | | | 97 | 20 | size_t dst_width = (src_width + 5) >> 1; | 98 | 20 | size_t size = ((src_width + STRIPE_MASK) & ~STRIPE_MASK) * src_height; | 99 | 20 | size_t step = STRIPE_WIDTH * src_height; | 100 | | | 101 | 20 | size_t offs = 0; | 102 | 20 | int16_t buf[3 * STRIPE_WIDTH]; | 103 | 20 | int16_t *ptr = buf + STRIPE_WIDTH; | 104 | 48 | for (size_t x = 0; x < dst_width; x += STRIPE_WIDTH) { | 105 | 168 | for (size_t y = 0; y < src_height; y++) { | 106 | 140 | copy_line(ptr - 1 * STRIPE_WIDTH, src, offs - 1 * step, size); | 107 | 140 | copy_line(ptr + 0 * STRIPE_WIDTH, src, offs + 0 * step, size); | 108 | 140 | copy_line(ptr + 1 * STRIPE_WIDTH, src, offs + 1 * step, size); | 109 | 1.26k | for (int k = 0; k < STRIPE_WIDTH; k++) | 110 | 1.12k | dst[k] = shrink_func(ptr[2 * k - 4], ptr[2 * k - 3], | 111 | 1.12k | ptr[2 * k - 2], ptr[2 * k - 1], | 112 | 1.12k | ptr[2 * k + 0], ptr[2 * k + 1]); | 113 | 140 | dst += STRIPE_WIDTH; | 114 | 140 | offs += STRIPE_WIDTH; | 115 | 140 | } | 116 | 28 | offs += step; | 117 | 28 | } | 118 | 20 | } |
Unexecuted instantiation: ass_shrink_horz32_c |
119 | | |
120 | | void SUFFIX(ass_shrink_vert)(int16_t *restrict dst, const int16_t *restrict src, |
121 | | size_t src_width, size_t src_height) |
122 | 20 | { |
123 | 20 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); |
124 | 20 | ASSUME(!((uintptr_t) src % ALIGNMENT)); |
125 | 20 | ASSUME(src_width > 0 && src_height > 0); |
126 | | |
127 | 20 | size_t dst_height = (src_height + 5) >> 1; |
128 | 20 | size_t step = STRIPE_WIDTH * src_height; |
129 | | |
130 | 80 | for (size_t x = 0; x < src_width; x += STRIPE_WIDTH) { |
131 | 60 | size_t offs = 0; |
132 | 456 | for (size_t y = 0; y < dst_height; y++) { |
133 | 396 | const int16_t *p1p = get_line(src, offs - 4 * STRIPE_WIDTH, step); |
134 | 396 | const int16_t *p1n = get_line(src, offs - 3 * STRIPE_WIDTH, step); |
135 | 396 | const int16_t *z0p = get_line(src, offs - 2 * STRIPE_WIDTH, step); |
136 | 396 | const int16_t *z0n = get_line(src, offs - 1 * STRIPE_WIDTH, step); |
137 | 396 | const int16_t *n1p = get_line(src, offs - 0 * STRIPE_WIDTH, step); |
138 | 396 | const int16_t *n1n = get_line(src, offs + 1 * STRIPE_WIDTH, step); |
139 | 3.56k | for (int k = 0; k < STRIPE_WIDTH; k++) |
140 | 3.16k | dst[k] = shrink_func(p1p[k], p1n[k], z0p[k], z0n[k], n1p[k], n1n[k]); |
141 | 396 | dst += 1 * STRIPE_WIDTH; |
142 | 396 | offs += 2 * STRIPE_WIDTH; |
143 | 396 | } |
144 | 60 | src += step; |
145 | 60 | } |
146 | 20 | } Line | Count | Source | 122 | 20 | { | 123 | 20 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); | 124 | 20 | ASSUME(!((uintptr_t) src % ALIGNMENT)); | 125 | 20 | ASSUME(src_width > 0 && src_height > 0); | 126 | | | 127 | 20 | size_t dst_height = (src_height + 5) >> 1; | 128 | 20 | size_t step = STRIPE_WIDTH * src_height; | 129 | | | 130 | 80 | for (size_t x = 0; x < src_width; x += STRIPE_WIDTH) { | 131 | 60 | size_t offs = 0; | 132 | 456 | for (size_t y = 0; y < dst_height; y++) { | 133 | 396 | const int16_t *p1p = get_line(src, offs - 4 * STRIPE_WIDTH, step); | 134 | 396 | const int16_t *p1n = get_line(src, offs - 3 * STRIPE_WIDTH, step); | 135 | 396 | const int16_t *z0p = get_line(src, offs - 2 * STRIPE_WIDTH, step); | 136 | 396 | const int16_t *z0n = get_line(src, offs - 1 * STRIPE_WIDTH, step); | 137 | 396 | const int16_t *n1p = get_line(src, offs - 0 * STRIPE_WIDTH, step); | 138 | 396 | const int16_t *n1n = get_line(src, offs + 1 * STRIPE_WIDTH, step); | 139 | 3.56k | for (int k = 0; k < STRIPE_WIDTH; k++) | 140 | 3.16k | dst[k] = shrink_func(p1p[k], p1n[k], z0p[k], z0n[k], n1p[k], n1n[k]); | 141 | 396 | dst += 1 * STRIPE_WIDTH; | 142 | 396 | offs += 2 * STRIPE_WIDTH; | 143 | 396 | } | 144 | 60 | src += step; | 145 | 60 | } | 146 | 20 | } |
Unexecuted instantiation: ass_shrink_vert32_c |
147 | | |
148 | | /* |
149 | | * Expand Filters |
150 | | * |
151 | | * Expand image by factor 2 with kernel [5, 10, 1], [1, 10, 5]. |
152 | | */ |
153 | | |
154 | | void SUFFIX(ass_expand_horz)(int16_t *restrict dst, const int16_t *restrict src, |
155 | | size_t src_width, size_t src_height) |
156 | 20 | { |
157 | 20 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); |
158 | 20 | ASSUME(!((uintptr_t) src % ALIGNMENT)); |
159 | 20 | ASSUME(src_width > 0 && src_height > 0); |
160 | | |
161 | 20 | size_t dst_width = 2 * src_width + 4; |
162 | 20 | size_t size = ((src_width + STRIPE_MASK) & ~STRIPE_MASK) * src_height; |
163 | 20 | size_t step = STRIPE_WIDTH * src_height; |
164 | | |
165 | 20 | size_t offs = 0; |
166 | 20 | int16_t buf[2 * STRIPE_WIDTH]; |
167 | 20 | int16_t *ptr = buf + STRIPE_WIDTH; |
168 | 404 | for (size_t x = STRIPE_WIDTH; x < dst_width; x += 2 * STRIPE_WIDTH) { |
169 | 8.44k | for (size_t y = 0; y < src_height; y++) { |
170 | 8.06k | copy_line(ptr - 1 * STRIPE_WIDTH, src, offs - 1 * step, size); |
171 | 8.06k | copy_line(ptr - 0 * STRIPE_WIDTH, src, offs - 0 * step, size); |
172 | 40.3k | for (int k = 0; k < STRIPE_WIDTH / 2; k++) |
173 | 32.2k | expand_func(&dst[2 * k], &dst[2 * k + 1], |
174 | 32.2k | ptr[k - 2], ptr[k - 1], ptr[k]); |
175 | 8.06k | int16_t *next = dst + step - STRIPE_WIDTH; |
176 | 40.3k | for (int k = STRIPE_WIDTH / 2; k < STRIPE_WIDTH; k++) |
177 | 32.2k | expand_func(&next[2 * k], &next[2 * k + 1], |
178 | 32.2k | ptr[k - 2], ptr[k - 1], ptr[k]); |
179 | 8.06k | dst += STRIPE_WIDTH; |
180 | 8.06k | offs += STRIPE_WIDTH; |
181 | 8.06k | } |
182 | 384 | dst += step; |
183 | 384 | } |
184 | 20 | if ((dst_width - 1) & STRIPE_WIDTH) |
185 | 16 | return; |
186 | | |
187 | 88 | for (size_t y = 0; y < src_height; y++) { |
188 | 84 | copy_line(ptr - 1 * STRIPE_WIDTH, src, offs - 1 * step, size); |
189 | 84 | copy_line(ptr - 0 * STRIPE_WIDTH, src, offs - 0 * step, size); |
190 | 420 | for (int k = 0; k < STRIPE_WIDTH / 2; k++) |
191 | 336 | expand_func(&dst[2 * k], &dst[2 * k + 1], |
192 | 336 | ptr[k - 2], ptr[k - 1], ptr[k]); |
193 | 84 | dst += STRIPE_WIDTH; |
194 | 84 | offs += STRIPE_WIDTH; |
195 | 84 | } |
196 | 4 | } Line | Count | Source | 156 | 20 | { | 157 | 20 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); | 158 | 20 | ASSUME(!((uintptr_t) src % ALIGNMENT)); | 159 | 20 | ASSUME(src_width > 0 && src_height > 0); | 160 | | | 161 | 20 | size_t dst_width = 2 * src_width + 4; | 162 | 20 | size_t size = ((src_width + STRIPE_MASK) & ~STRIPE_MASK) * src_height; | 163 | 20 | size_t step = STRIPE_WIDTH * src_height; | 164 | | | 165 | 20 | size_t offs = 0; | 166 | 20 | int16_t buf[2 * STRIPE_WIDTH]; | 167 | 20 | int16_t *ptr = buf + STRIPE_WIDTH; | 168 | 404 | for (size_t x = STRIPE_WIDTH; x < dst_width; x += 2 * STRIPE_WIDTH) { | 169 | 8.44k | for (size_t y = 0; y < src_height; y++) { | 170 | 8.06k | copy_line(ptr - 1 * STRIPE_WIDTH, src, offs - 1 * step, size); | 171 | 8.06k | copy_line(ptr - 0 * STRIPE_WIDTH, src, offs - 0 * step, size); | 172 | 40.3k | for (int k = 0; k < STRIPE_WIDTH / 2; k++) | 173 | 32.2k | expand_func(&dst[2 * k], &dst[2 * k + 1], | 174 | 32.2k | ptr[k - 2], ptr[k - 1], ptr[k]); | 175 | 8.06k | int16_t *next = dst + step - STRIPE_WIDTH; | 176 | 40.3k | for (int k = STRIPE_WIDTH / 2; k < STRIPE_WIDTH; k++) | 177 | 32.2k | expand_func(&next[2 * k], &next[2 * k + 1], | 178 | 32.2k | ptr[k - 2], ptr[k - 1], ptr[k]); | 179 | 8.06k | dst += STRIPE_WIDTH; | 180 | 8.06k | offs += STRIPE_WIDTH; | 181 | 8.06k | } | 182 | 384 | dst += step; | 183 | 384 | } | 184 | 20 | if ((dst_width - 1) & STRIPE_WIDTH) | 185 | 16 | return; | 186 | | | 187 | 88 | for (size_t y = 0; y < src_height; y++) { | 188 | 84 | copy_line(ptr - 1 * STRIPE_WIDTH, src, offs - 1 * step, size); | 189 | 84 | copy_line(ptr - 0 * STRIPE_WIDTH, src, offs - 0 * step, size); | 190 | 420 | for (int k = 0; k < STRIPE_WIDTH / 2; k++) | 191 | 336 | expand_func(&dst[2 * k], &dst[2 * k + 1], | 192 | 336 | ptr[k - 2], ptr[k - 1], ptr[k]); | 193 | 84 | dst += STRIPE_WIDTH; | 194 | 84 | offs += STRIPE_WIDTH; | 195 | 84 | } | 196 | 4 | } |
Unexecuted instantiation: ass_expand_horz32_c |
197 | | |
198 | | void SUFFIX(ass_expand_vert)(int16_t *restrict dst, const int16_t *restrict src, |
199 | | size_t src_width, size_t src_height) |
200 | 20 | { |
201 | 20 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); |
202 | 20 | ASSUME(!((uintptr_t) src % ALIGNMENT)); |
203 | 20 | ASSUME(src_width > 0 && src_height > 0); |
204 | | |
205 | 20 | size_t dst_height = 2 * src_height + 4; |
206 | 20 | size_t step = STRIPE_WIDTH * src_height; |
207 | | |
208 | 2.02k | for (size_t x = 0; x < src_width; x += STRIPE_WIDTH) { |
209 | 2.00k | size_t offs = 0; |
210 | 308k | for (size_t y = 0; y < dst_height; y += 2) { |
211 | 306k | const int16_t *p1 = get_line(src, offs - 2 * STRIPE_WIDTH, step); |
212 | 306k | const int16_t *z0 = get_line(src, offs - 1 * STRIPE_WIDTH, step); |
213 | 306k | const int16_t *n1 = get_line(src, offs - 0 * STRIPE_WIDTH, step); |
214 | 2.75M | for (int k = 0; k < STRIPE_WIDTH; k++) |
215 | 2.44M | expand_func(&dst[k], &dst[k + STRIPE_WIDTH], |
216 | 2.44M | p1[k], z0[k], n1[k]); |
217 | 306k | dst += 2 * STRIPE_WIDTH; |
218 | 306k | offs += 1 * STRIPE_WIDTH; |
219 | 306k | } |
220 | 2.00k | src += step; |
221 | 2.00k | } |
222 | 20 | } Line | Count | Source | 200 | 20 | { | 201 | 20 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); | 202 | 20 | ASSUME(!((uintptr_t) src % ALIGNMENT)); | 203 | 20 | ASSUME(src_width > 0 && src_height > 0); | 204 | | | 205 | 20 | size_t dst_height = 2 * src_height + 4; | 206 | 20 | size_t step = STRIPE_WIDTH * src_height; | 207 | | | 208 | 2.02k | for (size_t x = 0; x < src_width; x += STRIPE_WIDTH) { | 209 | 2.00k | size_t offs = 0; | 210 | 308k | for (size_t y = 0; y < dst_height; y += 2) { | 211 | 306k | const int16_t *p1 = get_line(src, offs - 2 * STRIPE_WIDTH, step); | 212 | 306k | const int16_t *z0 = get_line(src, offs - 1 * STRIPE_WIDTH, step); | 213 | 306k | const int16_t *n1 = get_line(src, offs - 0 * STRIPE_WIDTH, step); | 214 | 2.75M | for (int k = 0; k < STRIPE_WIDTH; k++) | 215 | 2.44M | expand_func(&dst[k], &dst[k + STRIPE_WIDTH], | 216 | 2.44M | p1[k], z0[k], n1[k]); | 217 | 306k | dst += 2 * STRIPE_WIDTH; | 218 | 306k | offs += 1 * STRIPE_WIDTH; | 219 | 306k | } | 220 | 2.00k | src += step; | 221 | 2.00k | } | 222 | 20 | } |
Unexecuted instantiation: ass_expand_vert32_c |
223 | | |
224 | | /* |
225 | | * Main Parametric Filters |
226 | | * |
227 | | * Perform 1D convolution with kernel [..., c2, c1, c0, d, c0, c1, c2, ...], |
228 | | * cN = param[N], d = 1 - 2 * (c0 + c1 + c2 + ...), |
229 | | * number of parameters is part of the function name. |
230 | | */ |
231 | | |
232 | | static inline void SUFFIX(blur_horz)(int16_t *restrict dst, const int16_t *restrict src, |
233 | | size_t src_width, size_t src_height, |
234 | | const int16_t *restrict param, const int n) |
235 | 7 | { |
236 | 7 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); |
237 | 7 | ASSUME(!((uintptr_t) src % ALIGNMENT)); |
238 | 7 | ASSUME(src_width > 0 && src_height > 0); |
239 | | |
240 | 7 | size_t dst_width = src_width + 2 * n; |
241 | 7 | size_t size = ((src_width + STRIPE_MASK) & ~STRIPE_MASK) * src_height; |
242 | 7 | size_t step = STRIPE_WIDTH * src_height; |
243 | | |
244 | 7 | size_t offs = 0; |
245 | 7 | int16_t buf[3 * STRIPE_WIDTH]; |
246 | 7 | int16_t *ptr = buf + 2 * STRIPE_WIDTH; |
247 | 31 | for (size_t x = 0; x < dst_width; x += STRIPE_WIDTH) { |
248 | 296 | for (size_t y = 0; y < src_height; y++) { |
249 | 976 | for (int i = -((2 * n + STRIPE_WIDTH - 1u) / STRIPE_WIDTH); i <= 0; i++) |
250 | 704 | copy_line(ptr + i * STRIPE_WIDTH, src, offs + i * step, size); |
251 | 272 | int32_t acc[STRIPE_WIDTH]; |
252 | 2.44k | for (int k = 0; k < STRIPE_WIDTH; k++) |
253 | 2.17k | acc[k] = 0x8000; |
254 | 1.70k | for (int i = n; i > 0; i--) |
255 | 12.8k | for (int k = 0; k < STRIPE_WIDTH; k++) |
256 | 11.4k | acc[k] += (int16_t) (ptr[k - n - i] - ptr[k - n]) * param[i - 1] + |
257 | 11.4k | (int16_t) (ptr[k - n + i] - ptr[k - n]) * param[i - 1]; |
258 | 2.44k | for (int k = 0; k < STRIPE_WIDTH; k++) |
259 | 2.17k | dst[k] = ptr[k - n] + (acc[k] >> 16); |
260 | | |
261 | 272 | dst += STRIPE_WIDTH; |
262 | 272 | offs += STRIPE_WIDTH; |
263 | 272 | } |
264 | 24 | } |
265 | 7 | } Line | Count | Source | 235 | 7 | { | 236 | 7 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); | 237 | 7 | ASSUME(!((uintptr_t) src % ALIGNMENT)); | 238 | 7 | ASSUME(src_width > 0 && src_height > 0); | 239 | | | 240 | 7 | size_t dst_width = src_width + 2 * n; | 241 | 7 | size_t size = ((src_width + STRIPE_MASK) & ~STRIPE_MASK) * src_height; | 242 | 7 | size_t step = STRIPE_WIDTH * src_height; | 243 | | | 244 | 7 | size_t offs = 0; | 245 | 7 | int16_t buf[3 * STRIPE_WIDTH]; | 246 | 7 | int16_t *ptr = buf + 2 * STRIPE_WIDTH; | 247 | 31 | for (size_t x = 0; x < dst_width; x += STRIPE_WIDTH) { | 248 | 296 | for (size_t y = 0; y < src_height; y++) { | 249 | 976 | for (int i = -((2 * n + STRIPE_WIDTH - 1u) / STRIPE_WIDTH); i <= 0; i++) | 250 | 704 | copy_line(ptr + i * STRIPE_WIDTH, src, offs + i * step, size); | 251 | 272 | int32_t acc[STRIPE_WIDTH]; | 252 | 2.44k | for (int k = 0; k < STRIPE_WIDTH; k++) | 253 | 2.17k | acc[k] = 0x8000; | 254 | 1.70k | for (int i = n; i > 0; i--) | 255 | 12.8k | for (int k = 0; k < STRIPE_WIDTH; k++) | 256 | 11.4k | acc[k] += (int16_t) (ptr[k - n - i] - ptr[k - n]) * param[i - 1] + | 257 | 11.4k | (int16_t) (ptr[k - n + i] - ptr[k - n]) * param[i - 1]; | 258 | 2.44k | for (int k = 0; k < STRIPE_WIDTH; k++) | 259 | 2.17k | dst[k] = ptr[k - n] + (acc[k] >> 16); | 260 | | | 261 | 272 | dst += STRIPE_WIDTH; | 262 | 272 | offs += STRIPE_WIDTH; | 263 | 272 | } | 264 | 24 | } | 265 | 7 | } |
Unexecuted instantiation: c_blur.c:blur_horz32_c |
266 | | |
267 | | static inline void SUFFIX(blur_vert)(int16_t *restrict dst, const int16_t *restrict src, |
268 | | size_t src_width, size_t src_height, |
269 | | const int16_t *restrict param, const int n) |
270 | 7 | { |
271 | 7 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); |
272 | 7 | ASSUME(!((uintptr_t) src % ALIGNMENT)); |
273 | 7 | ASSUME(src_width > 0 && src_height > 0); |
274 | | |
275 | 7 | size_t dst_height = src_height + 2 * n; |
276 | 7 | size_t step = STRIPE_WIDTH * src_height; |
277 | | |
278 | 31 | for (size_t x = 0; x < src_width; x += STRIPE_WIDTH) { |
279 | 24 | size_t offs = 0; |
280 | 594 | for (size_t y = 0; y < dst_height; y++) { |
281 | 570 | int32_t acc[STRIPE_WIDTH]; |
282 | 5.13k | for (int k = 0; k < STRIPE_WIDTH; k++) |
283 | 4.56k | acc[k] = 0x8000; |
284 | 570 | const int16_t *center = get_line(src, offs - n * STRIPE_WIDTH, step); |
285 | 4.00k | for (int i = n; i > 0; i--) { |
286 | 3.43k | const int16_t *line1 = get_line(src, offs - (n + i) * STRIPE_WIDTH, step); |
287 | 3.43k | const int16_t *line2 = get_line(src, offs - (n - i) * STRIPE_WIDTH, step); |
288 | 30.9k | for (int k = 0; k < STRIPE_WIDTH; k++) |
289 | 27.5k | acc[k] += (int16_t) (line1[k] - center[k]) * param[i - 1] + |
290 | 27.5k | (int16_t) (line2[k] - center[k]) * param[i - 1]; |
291 | 3.43k | } |
292 | 5.13k | for (int k = 0; k < STRIPE_WIDTH; k++) |
293 | 4.56k | dst[k] = center[k] + (acc[k] >> 16); |
294 | | |
295 | 570 | dst += STRIPE_WIDTH; |
296 | 570 | offs += STRIPE_WIDTH; |
297 | 570 | } |
298 | 24 | src += step; |
299 | 24 | } |
300 | 7 | } Line | Count | Source | 270 | 7 | { | 271 | 7 | ASSUME(!((uintptr_t) dst % ALIGNMENT)); | 272 | 7 | ASSUME(!((uintptr_t) src % ALIGNMENT)); | 273 | 7 | ASSUME(src_width > 0 && src_height > 0); | 274 | | | 275 | 7 | size_t dst_height = src_height + 2 * n; | 276 | 7 | size_t step = STRIPE_WIDTH * src_height; | 277 | | | 278 | 31 | for (size_t x = 0; x < src_width; x += STRIPE_WIDTH) { | 279 | 24 | size_t offs = 0; | 280 | 594 | for (size_t y = 0; y < dst_height; y++) { | 281 | 570 | int32_t acc[STRIPE_WIDTH]; | 282 | 5.13k | for (int k = 0; k < STRIPE_WIDTH; k++) | 283 | 4.56k | acc[k] = 0x8000; | 284 | 570 | const int16_t *center = get_line(src, offs - n * STRIPE_WIDTH, step); | 285 | 4.00k | for (int i = n; i > 0; i--) { | 286 | 3.43k | const int16_t *line1 = get_line(src, offs - (n + i) * STRIPE_WIDTH, step); | 287 | 3.43k | const int16_t *line2 = get_line(src, offs - (n - i) * STRIPE_WIDTH, step); | 288 | 30.9k | for (int k = 0; k < STRIPE_WIDTH; k++) | 289 | 27.5k | acc[k] += (int16_t) (line1[k] - center[k]) * param[i - 1] + | 290 | 27.5k | (int16_t) (line2[k] - center[k]) * param[i - 1]; | 291 | 3.43k | } | 292 | 5.13k | for (int k = 0; k < STRIPE_WIDTH; k++) | 293 | 4.56k | dst[k] = center[k] + (acc[k] >> 16); | 294 | | | 295 | 570 | dst += STRIPE_WIDTH; | 296 | 570 | offs += STRIPE_WIDTH; | 297 | 570 | } | 298 | 24 | src += step; | 299 | 24 | } | 300 | 7 | } |
Unexecuted instantiation: c_blur.c:blur_vert32_c |
301 | | |
302 | | void SUFFIX(ass_blur4_horz)(int16_t *restrict dst, const int16_t *restrict src, |
303 | | size_t src_width, size_t src_height, |
304 | | const int16_t *restrict param) |
305 | 2 | { |
306 | 2 | SUFFIX(blur_horz)(dst, src, src_width, src_height, param, 4); |
307 | 2 | } Line | Count | Source | 305 | 2 | { | 306 | 2 | SUFFIX(blur_horz)(dst, src, src_width, src_height, param, 4); | 307 | 2 | } |
Unexecuted instantiation: ass_blur4_horz32_c |
308 | | |
309 | | void SUFFIX(ass_blur4_vert)(int16_t *restrict dst, const int16_t *restrict src, |
310 | | size_t src_width, size_t src_height, |
311 | | const int16_t *restrict param) |
312 | 2 | { |
313 | 2 | SUFFIX(blur_vert)(dst, src, src_width, src_height, param, 4); |
314 | 2 | } Line | Count | Source | 312 | 2 | { | 313 | 2 | SUFFIX(blur_vert)(dst, src, src_width, src_height, param, 4); | 314 | 2 | } |
Unexecuted instantiation: ass_blur4_vert32_c |
315 | | |
316 | | void SUFFIX(ass_blur5_horz)(int16_t *restrict dst, const int16_t *restrict src, |
317 | | size_t src_width, size_t src_height, |
318 | | const int16_t *restrict param) |
319 | 1 | { |
320 | 1 | SUFFIX(blur_horz)(dst, src, src_width, src_height, param, 5); |
321 | 1 | } Line | Count | Source | 319 | 1 | { | 320 | 1 | SUFFIX(blur_horz)(dst, src, src_width, src_height, param, 5); | 321 | 1 | } |
Unexecuted instantiation: ass_blur5_horz32_c |
322 | | |
323 | | void SUFFIX(ass_blur5_vert)(int16_t *dst, const int16_t *src, |
324 | | size_t src_width, size_t src_height, |
325 | | const int16_t *param) |
326 | 1 | { |
327 | 1 | SUFFIX(blur_vert)(dst, src, src_width, src_height, param, 5); |
328 | 1 | } Line | Count | Source | 326 | 1 | { | 327 | 1 | SUFFIX(blur_vert)(dst, src, src_width, src_height, param, 5); | 328 | 1 | } |
Unexecuted instantiation: ass_blur5_vert32_c |
329 | | |
330 | | void SUFFIX(ass_blur6_horz)(int16_t *dst, const int16_t *src, |
331 | | size_t src_width, size_t src_height, |
332 | | const int16_t *param) |
333 | 0 | { |
334 | 0 | SUFFIX(blur_horz)(dst, src, src_width, src_height, param, 6); |
335 | 0 | } Unexecuted instantiation: ass_blur6_horz16_c Unexecuted instantiation: ass_blur6_horz32_c |
336 | | |
337 | | void SUFFIX(ass_blur6_vert)(int16_t *dst, const int16_t *src, |
338 | | size_t src_width, size_t src_height, |
339 | | const int16_t *param) |
340 | 0 | { |
341 | 0 | SUFFIX(blur_vert)(dst, src, src_width, src_height, param, 6); |
342 | 0 | } Unexecuted instantiation: ass_blur6_vert16_c Unexecuted instantiation: ass_blur6_vert32_c |
343 | | |
344 | | void SUFFIX(ass_blur7_horz)(int16_t *dst, const int16_t *src, |
345 | | size_t src_width, size_t src_height, |
346 | | const int16_t *param) |
347 | 0 | { |
348 | 0 | SUFFIX(blur_horz)(dst, src, src_width, src_height, param, 7); |
349 | 0 | } Unexecuted instantiation: ass_blur7_horz16_c Unexecuted instantiation: ass_blur7_horz32_c |
350 | | |
351 | | void SUFFIX(ass_blur7_vert)(int16_t *dst, const int16_t *src, |
352 | | size_t src_width, size_t src_height, |
353 | | const int16_t *param) |
354 | 0 | { |
355 | 0 | SUFFIX(blur_vert)(dst, src, src_width, src_height, param, 7); |
356 | 0 | } Unexecuted instantiation: ass_blur7_vert16_c Unexecuted instantiation: ass_blur7_vert32_c |
357 | | |
358 | | void SUFFIX(ass_blur8_horz)(int16_t *dst, const int16_t *src, |
359 | | size_t src_width, size_t src_height, |
360 | | const int16_t *param) |
361 | 4 | { |
362 | 4 | SUFFIX(blur_horz)(dst, src, src_width, src_height, param, 8); |
363 | 4 | } Line | Count | Source | 361 | 4 | { | 362 | 4 | SUFFIX(blur_horz)(dst, src, src_width, src_height, param, 8); | 363 | 4 | } |
Unexecuted instantiation: ass_blur8_horz32_c |
364 | | |
365 | | void SUFFIX(ass_blur8_vert)(int16_t *dst, const int16_t *src, |
366 | | size_t src_width, size_t src_height, |
367 | | const int16_t *param) |
368 | 4 | { |
369 | 4 | SUFFIX(blur_vert)(dst, src, src_width, src_height, param, 8); |
370 | 4 | } Line | Count | Source | 368 | 4 | { | 369 | 4 | SUFFIX(blur_vert)(dst, src, src_width, src_height, param, 8); | 370 | 4 | } |
Unexecuted instantiation: ass_blur8_vert32_c |
371 | | |
372 | | |
373 | | #undef STRIPE_WIDTH |
374 | | #undef STRIPE_MASK |
375 | | #undef copy_line |