/src/dav1d/src/filmgrain_tmpl.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright © 2018, Niklas Haas |
3 | | * Copyright © 2018, VideoLAN and dav1d authors |
4 | | * Copyright © 2018, Two Orioles, LLC |
5 | | * All rights reserved. |
6 | | * |
7 | | * Redistribution and use in source and binary forms, with or without |
8 | | * modification, are permitted provided that the following conditions are met: |
9 | | * |
10 | | * 1. Redistributions of source code must retain the above copyright notice, this |
11 | | * list of conditions and the following disclaimer. |
12 | | * |
13 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
14 | | * this list of conditions and the following disclaimer in the documentation |
15 | | * and/or other materials provided with the distribution. |
16 | | * |
17 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
18 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
19 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
20 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
21 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
22 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
23 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
24 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
26 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | | */ |
28 | | |
29 | | #include "common/attributes.h" |
30 | | #include "common/intops.h" |
31 | | |
32 | | #include "src/filmgrain.h" |
33 | | #include "src/tables.h" |
34 | | |
35 | 0 | #define SUB_GRAIN_WIDTH 44 |
36 | 0 | #define SUB_GRAIN_HEIGHT 38 |
37 | | |
38 | 0 | static inline int get_random_number(const int bits, unsigned *const state) { |
39 | 0 | const int r = *state; |
40 | 0 | unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1; |
41 | 0 | *state = (r >> 1) | (bit << 15); |
42 | |
|
43 | 0 | return (*state >> (16 - bits)) & ((1 << bits) - 1); |
44 | 0 | } |
45 | | |
46 | 0 | static inline int round2(const int x, const uint64_t shift) { |
47 | 0 | return (x + ((1 << shift) >> 1)) >> shift; |
48 | 0 | } |
49 | | |
50 | | static void generate_grain_y_c(entry buf[][GRAIN_WIDTH], |
51 | | const Dav1dFilmGrainData *const data |
52 | | HIGHBD_DECL_SUFFIX) |
53 | 0 | { |
54 | 0 | const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; |
55 | 0 | unsigned seed = data->seed; |
56 | 0 | const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift; |
57 | 0 | const int grain_ctr = 128 << bitdepth_min_8; |
58 | 0 | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; |
59 | |
|
60 | 0 | for (int y = 0; y < GRAIN_HEIGHT; y++) { |
61 | 0 | for (int x = 0; x < GRAIN_WIDTH; x++) { |
62 | 0 | const int value = get_random_number(11, &seed); |
63 | 0 | buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift); |
64 | 0 | } |
65 | 0 | } |
66 | |
|
67 | 0 | const int ar_pad = 3; |
68 | 0 | const int ar_lag = data->ar_coeff_lag; |
69 | |
|
70 | 0 | for (int y = ar_pad; y < GRAIN_HEIGHT; y++) { |
71 | 0 | for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) { |
72 | 0 | const int8_t *coeff = data->ar_coeffs_y; |
73 | 0 | int sum = 0; |
74 | 0 | for (int dy = -ar_lag; dy <= 0; dy++) { |
75 | 0 | for (int dx = -ar_lag; dx <= ar_lag; dx++) { |
76 | 0 | if (!dx && !dy) |
77 | 0 | break; |
78 | 0 | sum += *(coeff++) * buf[y + dy][x + dx]; |
79 | 0 | } |
80 | 0 | } |
81 | |
|
82 | 0 | const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift); |
83 | 0 | buf[y][x] = iclip(grain, grain_min, grain_max); |
84 | 0 | } |
85 | 0 | } |
86 | 0 | } |
87 | | |
88 | | static NOINLINE void |
89 | | generate_grain_uv_c(entry buf[][GRAIN_WIDTH], |
90 | | const entry buf_y[][GRAIN_WIDTH], |
91 | | const Dav1dFilmGrainData *const data, const intptr_t uv, |
92 | | const int subx, const int suby HIGHBD_DECL_SUFFIX) |
93 | 0 | { |
94 | 0 | const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; |
95 | 0 | unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524); |
96 | 0 | const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift; |
97 | 0 | const int grain_ctr = 128 << bitdepth_min_8; |
98 | 0 | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; |
99 | |
|
100 | 0 | const int chromaW = subx ? SUB_GRAIN_WIDTH : GRAIN_WIDTH; |
101 | 0 | const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT; |
102 | |
|
103 | 0 | for (int y = 0; y < chromaH; y++) { |
104 | 0 | for (int x = 0; x < chromaW; x++) { |
105 | 0 | const int value = get_random_number(11, &seed); |
106 | 0 | buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift); |
107 | 0 | } |
108 | 0 | } |
109 | |
|
110 | 0 | const int ar_pad = 3; |
111 | 0 | const int ar_lag = data->ar_coeff_lag; |
112 | |
|
113 | 0 | for (int y = ar_pad; y < chromaH; y++) { |
114 | 0 | for (int x = ar_pad; x < chromaW - ar_pad; x++) { |
115 | 0 | const int8_t *coeff = data->ar_coeffs_uv[uv]; |
116 | 0 | int sum = 0; |
117 | 0 | for (int dy = -ar_lag; dy <= 0; dy++) { |
118 | 0 | for (int dx = -ar_lag; dx <= ar_lag; dx++) { |
119 | | // For the final (current) pixel, we need to add in the |
120 | | // contribution from the luma grain texture |
121 | 0 | if (!dx && !dy) { |
122 | 0 | if (!data->num_y_points) |
123 | 0 | break; |
124 | 0 | int luma = 0; |
125 | 0 | const int lumaX = ((x - ar_pad) << subx) + ar_pad; |
126 | 0 | const int lumaY = ((y - ar_pad) << suby) + ar_pad; |
127 | 0 | for (int i = 0; i <= suby; i++) { |
128 | 0 | for (int j = 0; j <= subx; j++) { |
129 | 0 | luma += buf_y[lumaY + i][lumaX + j]; |
130 | 0 | } |
131 | 0 | } |
132 | 0 | luma = round2(luma, subx + suby); |
133 | 0 | sum += luma * (*coeff); |
134 | 0 | break; |
135 | 0 | } |
136 | | |
137 | 0 | sum += *(coeff++) * buf[y + dy][x + dx]; |
138 | 0 | } |
139 | 0 | } |
140 | |
|
141 | 0 | const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift); |
142 | 0 | buf[y][x] = iclip(grain, grain_min, grain_max); |
143 | 0 | } |
144 | 0 | } |
145 | 0 | } |
146 | | |
147 | | #define gnuv_ss_fn(nm, ss_x, ss_y) \ |
148 | 0 | static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \ |
149 | 0 | generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \ |
150 | 0 | } Unexecuted instantiation: filmgrain_tmpl.c:generate_grain_uv_420_c Unexecuted instantiation: filmgrain_tmpl.c:generate_grain_uv_422_c Unexecuted instantiation: filmgrain_tmpl.c:generate_grain_uv_444_c |
151 | | |
152 | | gnuv_ss_fn(420, 1, 1); |
153 | | gnuv_ss_fn(422, 1, 0); |
154 | | gnuv_ss_fn(444, 0, 0); |
155 | | |
156 | | // samples from the correct block of a grain LUT, while taking into account the |
157 | | // offsets provided by the offsets cache |
158 | | static inline entry sample_lut(const entry grain_lut[][GRAIN_WIDTH], |
159 | | const int offsets[2][2], const int subx, const int suby, |
160 | | const int bx, const int by, const int x, const int y) |
161 | 0 | { |
162 | 0 | const int randval = offsets[bx][by]; |
163 | 0 | const int offx = 3 + (2 >> subx) * (3 + (randval >> 4)); |
164 | 0 | const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF)); |
165 | 0 | return grain_lut[offy + y + (FG_BLOCK_SIZE >> suby) * by] |
166 | 0 | [offx + x + (FG_BLOCK_SIZE >> subx) * bx]; |
167 | 0 | } |
168 | | |
169 | | static void fgy_32x32xn_c(pixel *const dst_row, const pixel *const src_row, |
170 | | const ptrdiff_t stride, |
171 | | const Dav1dFilmGrainData *const data, const size_t pw, |
172 | | const uint8_t scaling[SCALING_SIZE], |
173 | | const entry grain_lut[][GRAIN_WIDTH], |
174 | | const int bh, const int row_num HIGHBD_DECL_SUFFIX) |
175 | 0 | { |
176 | 0 | const int rows = 1 + (data->overlap_flag && row_num > 0); |
177 | 0 | const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; |
178 | 0 | const int grain_ctr = 128 << bitdepth_min_8; |
179 | 0 | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; |
180 | |
|
181 | 0 | int min_value, max_value; |
182 | 0 | if (data->clip_to_restricted_range) { |
183 | 0 | min_value = 16 << bitdepth_min_8; |
184 | 0 | max_value = 235 << bitdepth_min_8; |
185 | 0 | } else { |
186 | 0 | min_value = 0; |
187 | 0 | max_value = BITDEPTH_MAX; |
188 | 0 | } |
189 | | |
190 | | // seed[0] contains the current row, seed[1] contains the previous |
191 | 0 | unsigned seed[2]; |
192 | 0 | for (int i = 0; i < rows; i++) { |
193 | 0 | seed[i] = data->seed; |
194 | 0 | seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8; |
195 | 0 | seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF); |
196 | 0 | } |
197 | |
|
198 | 0 | assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0); |
199 | | |
200 | 0 | int offsets[2 /* col offset */][2 /* row offset */]; |
201 | | |
202 | | // process this row in FG_BLOCK_SIZE^2 blocks |
203 | 0 | for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) { |
204 | 0 | const int bw = imin(FG_BLOCK_SIZE, (int) pw - bx); |
205 | |
|
206 | 0 | if (data->overlap_flag && bx) { |
207 | | // shift previous offsets left |
208 | 0 | for (int i = 0; i < rows; i++) |
209 | 0 | offsets[1][i] = offsets[0][i]; |
210 | 0 | } |
211 | | |
212 | | // update current offsets |
213 | 0 | for (int i = 0; i < rows; i++) |
214 | 0 | offsets[0][i] = get_random_number(8, &seed[i]); |
215 | | |
216 | | // x/y block offsets to compensate for overlapped regions |
217 | 0 | const int ystart = data->overlap_flag && row_num ? imin(2, bh) : 0; |
218 | 0 | const int xstart = data->overlap_flag && bx ? imin(2, bw) : 0; |
219 | |
|
220 | 0 | static const int w[2][2] = { { 27, 17 }, { 17, 27 } }; |
221 | |
|
222 | 0 | #define add_noise_y(x, y, grain) \ |
223 | 0 | const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (x) + bx; \ |
224 | 0 | pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (x) + bx; \ |
225 | 0 | const int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \ |
226 | 0 | *dst = iclip(*src + noise, min_value, max_value); |
227 | |
|
228 | 0 | for (int y = ystart; y < bh; y++) { |
229 | | // Non-overlapped image region (straightforward) |
230 | 0 | for (int x = xstart; x < bw; x++) { |
231 | 0 | int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); |
232 | 0 | add_noise_y(x, y, grain); |
233 | 0 | } |
234 | | |
235 | | // Special case for overlapped column |
236 | 0 | for (int x = 0; x < xstart; x++) { |
237 | 0 | int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); |
238 | 0 | int old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y); |
239 | 0 | grain = round2(old * w[x][0] + grain * w[x][1], 5); |
240 | 0 | grain = iclip(grain, grain_min, grain_max); |
241 | 0 | add_noise_y(x, y, grain); |
242 | 0 | } |
243 | 0 | } |
244 | |
|
245 | 0 | for (int y = 0; y < ystart; y++) { |
246 | | // Special case for overlapped row (sans corner) |
247 | 0 | for (int x = xstart; x < bw; x++) { |
248 | 0 | int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); |
249 | 0 | int old = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y); |
250 | 0 | grain = round2(old * w[y][0] + grain * w[y][1], 5); |
251 | 0 | grain = iclip(grain, grain_min, grain_max); |
252 | 0 | add_noise_y(x, y, grain); |
253 | 0 | } |
254 | | |
255 | | // Special case for doubly-overlapped corner |
256 | 0 | for (int x = 0; x < xstart; x++) { |
257 | | // Blend the top pixel with the top left block |
258 | 0 | int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y); |
259 | 0 | int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y); |
260 | 0 | top = round2(old * w[x][0] + top * w[x][1], 5); |
261 | 0 | top = iclip(top, grain_min, grain_max); |
262 | | |
263 | | // Blend the current pixel with the left block |
264 | 0 | int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); |
265 | 0 | old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y); |
266 | 0 | grain = round2(old * w[x][0] + grain * w[x][1], 5); |
267 | 0 | grain = iclip(grain, grain_min, grain_max); |
268 | | |
269 | | // Mix the row rows together and apply grain |
270 | 0 | grain = round2(top * w[y][0] + grain * w[y][1], 5); |
271 | 0 | grain = iclip(grain, grain_min, grain_max); |
272 | 0 | add_noise_y(x, y, grain); |
273 | 0 | } |
274 | 0 | } |
275 | 0 | } |
276 | 0 | } |
277 | | |
278 | | static NOINLINE void |
279 | | fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row, |
280 | | const ptrdiff_t stride, const Dav1dFilmGrainData *const data, |
281 | | const size_t pw, const uint8_t scaling[SCALING_SIZE], |
282 | | const entry grain_lut[][GRAIN_WIDTH], const int bh, |
283 | | const int row_num, const pixel *const luma_row, |
284 | | const ptrdiff_t luma_stride, const int uv, const int is_id, |
285 | | const int sx, const int sy HIGHBD_DECL_SUFFIX) |
286 | 0 | { |
287 | 0 | const int rows = 1 + (data->overlap_flag && row_num > 0); |
288 | 0 | const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; |
289 | 0 | const int grain_ctr = 128 << bitdepth_min_8; |
290 | 0 | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; |
291 | |
|
292 | 0 | int min_value, max_value; |
293 | 0 | if (data->clip_to_restricted_range) { |
294 | 0 | min_value = 16 << bitdepth_min_8; |
295 | 0 | max_value = (is_id ? 235 : 240) << bitdepth_min_8; |
296 | 0 | } else { |
297 | 0 | min_value = 0; |
298 | 0 | max_value = BITDEPTH_MAX; |
299 | 0 | } |
300 | | |
301 | | // seed[0] contains the current row, seed[1] contains the previous |
302 | 0 | unsigned seed[2]; |
303 | 0 | for (int i = 0; i < rows; i++) { |
304 | 0 | seed[i] = data->seed; |
305 | 0 | seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8; |
306 | 0 | seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF); |
307 | 0 | } |
308 | |
|
309 | 0 | assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0); |
310 | | |
311 | 0 | int offsets[2 /* col offset */][2 /* row offset */]; |
312 | | |
313 | | // process this row in FG_BLOCK_SIZE^2 blocks (subsampled) |
314 | 0 | for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) { |
315 | 0 | const int bw = imin(FG_BLOCK_SIZE >> sx, (int)(pw - bx)); |
316 | 0 | if (data->overlap_flag && bx) { |
317 | | // shift previous offsets left |
318 | 0 | for (int i = 0; i < rows; i++) |
319 | 0 | offsets[1][i] = offsets[0][i]; |
320 | 0 | } |
321 | | |
322 | | // update current offsets |
323 | 0 | for (int i = 0; i < rows; i++) |
324 | 0 | offsets[0][i] = get_random_number(8, &seed[i]); |
325 | | |
326 | | // x/y block offsets to compensate for overlapped regions |
327 | 0 | const int ystart = data->overlap_flag && row_num ? imin(2 >> sy, bh) : 0; |
328 | 0 | const int xstart = data->overlap_flag && bx ? imin(2 >> sx, bw) : 0; |
329 | |
|
330 | 0 | static const int w[2 /* sub */][2 /* off */][2] = { |
331 | 0 | { { 27, 17 }, { 17, 27 } }, |
332 | 0 | { { 23, 22 } }, |
333 | 0 | }; |
334 | |
|
335 | 0 | #define add_noise_uv(x, y, grain) \ |
336 | 0 | const int lx = (bx + x) << sx; \ |
337 | 0 | const int ly = y << sy; \ |
338 | 0 | const pixel *const luma = luma_row + ly * PXSTRIDE(luma_stride) + lx; \ |
339 | 0 | pixel avg = luma[0]; \ |
340 | 0 | if (sx) \ |
341 | 0 | avg = (avg + luma[1] + 1) >> 1; \ |
342 | 0 | const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (bx + (x)); \ |
343 | 0 | pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x)); \ |
344 | 0 | int val = avg; \ |
345 | 0 | if (!data->chroma_scaling_from_luma) { \ |
346 | 0 | const int combined = avg * data->uv_luma_mult[uv] + \ |
347 | 0 | *src * data->uv_mult[uv]; \ |
348 | 0 | val = iclip_pixel( (combined >> 6) + \ |
349 | 0 | (data->uv_offset[uv] * (1 << bitdepth_min_8)) ); \ |
350 | 0 | } \ |
351 | 0 | const int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \ |
352 | 0 | *dst = iclip(*src + noise, min_value, max_value); |
353 | |
|
354 | 0 | for (int y = ystart; y < bh; y++) { |
355 | | // Non-overlapped image region (straightforward) |
356 | 0 | for (int x = xstart; x < bw; x++) { |
357 | 0 | int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y); |
358 | 0 | add_noise_uv(x, y, grain); |
359 | 0 | } |
360 | | |
361 | | // Special case for overlapped column |
362 | 0 | for (int x = 0; x < xstart; x++) { |
363 | 0 | int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y); |
364 | 0 | int old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y); |
365 | 0 | grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5); |
366 | 0 | grain = iclip(grain, grain_min, grain_max); |
367 | 0 | add_noise_uv(x, y, grain); |
368 | 0 | } |
369 | 0 | } |
370 | |
|
371 | 0 | for (int y = 0; y < ystart; y++) { |
372 | | // Special case for overlapped row (sans corner) |
373 | 0 | for (int x = xstart; x < bw; x++) { |
374 | 0 | int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y); |
375 | 0 | int old = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y); |
376 | 0 | grain = round2(old * w[sy][y][0] + grain * w[sy][y][1], 5); |
377 | 0 | grain = iclip(grain, grain_min, grain_max); |
378 | 0 | add_noise_uv(x, y, grain); |
379 | 0 | } |
380 | | |
381 | | // Special case for doubly-overlapped corner |
382 | 0 | for (int x = 0; x < xstart; x++) { |
383 | | // Blend the top pixel with the top left block |
384 | 0 | int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y); |
385 | 0 | int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y); |
386 | 0 | top = round2(old * w[sx][x][0] + top * w[sx][x][1], 5); |
387 | 0 | top = iclip(top, grain_min, grain_max); |
388 | | |
389 | | // Blend the current pixel with the left block |
390 | 0 | int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y); |
391 | 0 | old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y); |
392 | 0 | grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5); |
393 | 0 | grain = iclip(grain, grain_min, grain_max); |
394 | | |
395 | | // Mix the row rows together and apply to image |
396 | 0 | grain = round2(top * w[sy][y][0] + grain * w[sy][y][1], 5); |
397 | 0 | grain = iclip(grain, grain_min, grain_max); |
398 | 0 | add_noise_uv(x, y, grain); |
399 | 0 | } |
400 | 0 | } |
401 | 0 | } |
402 | 0 | } |
403 | | |
404 | | #define fguv_ss_fn(nm, ss_x, ss_y) \ |
405 | 0 | static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \ |
406 | 0 | fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \ |
407 | 0 | row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \ |
408 | 0 | HIGHBD_TAIL_SUFFIX); \ |
409 | 0 | } Unexecuted instantiation: filmgrain_tmpl.c:fguv_32x32xn_420_c Unexecuted instantiation: filmgrain_tmpl.c:fguv_32x32xn_422_c Unexecuted instantiation: filmgrain_tmpl.c:fguv_32x32xn_444_c |
410 | | |
411 | | fguv_ss_fn(420, 1, 1); |
412 | | fguv_ss_fn(422, 1, 0); |
413 | | fguv_ss_fn(444, 0, 0); |
414 | | |
415 | | #if HAVE_ASM |
416 | | #if ARCH_AARCH64 || ARCH_ARM |
417 | | #include "src/arm/filmgrain.h" |
418 | | #elif ARCH_X86 |
419 | | #include "src/x86/filmgrain.h" |
420 | | #endif |
421 | | #endif |
422 | | |
423 | 19.6k | COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) { |
424 | 19.6k | c->generate_grain_y = generate_grain_y_c; |
425 | 19.6k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c; |
426 | 19.6k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c; |
427 | 19.6k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c; |
428 | | |
429 | 19.6k | c->fgy_32x32xn = fgy_32x32xn_c; |
430 | 19.6k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c; |
431 | 19.6k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c; |
432 | 19.6k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c; |
433 | | |
434 | 19.6k | #if HAVE_ASM |
435 | | #if ARCH_AARCH64 || ARCH_ARM |
436 | | film_grain_dsp_init_arm(c); |
437 | | #elif ARCH_X86 |
438 | | film_grain_dsp_init_x86(c); |
439 | 19.6k | #endif |
440 | 19.6k | #endif |
441 | 19.6k | } dav1d_film_grain_dsp_init_8bpc Line | Count | Source | 423 | 7.74k | COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) { | 424 | 7.74k | c->generate_grain_y = generate_grain_y_c; | 425 | 7.74k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c; | 426 | 7.74k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c; | 427 | 7.74k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c; | 428 | | | 429 | 7.74k | c->fgy_32x32xn = fgy_32x32xn_c; | 430 | 7.74k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c; | 431 | 7.74k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c; | 432 | 7.74k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c; | 433 | | | 434 | 7.74k | #if HAVE_ASM | 435 | | #if ARCH_AARCH64 || ARCH_ARM | 436 | | film_grain_dsp_init_arm(c); | 437 | | #elif ARCH_X86 | 438 | | film_grain_dsp_init_x86(c); | 439 | 7.74k | #endif | 440 | 7.74k | #endif | 441 | 7.74k | } |
dav1d_film_grain_dsp_init_16bpc Line | Count | Source | 423 | 11.8k | COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) { | 424 | 11.8k | c->generate_grain_y = generate_grain_y_c; | 425 | 11.8k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c; | 426 | 11.8k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c; | 427 | 11.8k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c; | 428 | | | 429 | 11.8k | c->fgy_32x32xn = fgy_32x32xn_c; | 430 | 11.8k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c; | 431 | 11.8k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c; | 432 | 11.8k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c; | 433 | | | 434 | 11.8k | #if HAVE_ASM | 435 | | #if ARCH_AARCH64 || ARCH_ARM | 436 | | film_grain_dsp_init_arm(c); | 437 | | #elif ARCH_X86 | 438 | | film_grain_dsp_init_x86(c); | 439 | 11.8k | #endif | 440 | 11.8k | #endif | 441 | 11.8k | } |
|