/work/dav1d/src/filmgrain_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, Niklas Haas |
3 | | * Copyright © 2018, VideoLAN and dav1d authors |
4 | | * Copyright © 2018, Two Orioles, LLC |
5 | | * All rights reserved. |
6 | | * |
7 | | * Redistribution and use in source and binary forms, with or without |
8 | | * modification, are permitted provided that the following conditions are met: |
9 | | * |
10 | | * 1. Redistributions of source code must retain the above copyright notice, this |
11 | | * list of conditions and the following disclaimer. |
12 | | * |
13 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
14 | | * this list of conditions and the following disclaimer in the documentation |
15 | | * and/or other materials provided with the distribution. |
16 | | * |
17 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
18 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
19 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
20 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
21 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
22 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
23 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
24 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
26 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | | */ |
28 | | |
29 | | #include "common/attributes.h" |
30 | | #include "common/intops.h" |
31 | | |
32 | | #include "src/filmgrain.h" |
33 | | #include "src/tables.h" |
34 | | |
35 | 1.26k | #define SUB_GRAIN_WIDTH 44 |
36 | 1.05k | #define SUB_GRAIN_HEIGHT 38 |
37 | | |
38 | 33.9M | static inline int get_random_number(const int bits, unsigned *const state) { |
39 | 33.9M | const int r = *state; |
40 | 33.9M | unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1; |
41 | 33.9M | *state = (r >> 1) | (bit << 15); |
42 | | |
43 | 33.9M | return (*state >> (16 - bits)) & ((1 << bits) - 1); |
44 | 33.9M | } |
45 | | |
46 | 96.6M | static inline int round2(const int x, const uint64_t shift) { |
47 | 96.6M | return (x + ((1 << shift) >> 1)) >> shift; |
48 | 96.6M | } |
49 | | |
50 | | static void generate_grain_y_c(entry buf[][GRAIN_WIDTH], |
51 | | const Dav1dFilmGrainData *const data |
52 | | HIGHBD_DECL_SUFFIX) |
53 | 2.73k | { |
54 | 2.73k | const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; |
55 | 2.73k | unsigned seed = data->seed; |
56 | 2.73k | const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift; |
57 | 2.73k | const int grain_ctr = 128 << bitdepth_min_8; |
58 | 2.73k | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; |
59 | | |
60 | 202k | for (int y = 0; y < GRAIN_HEIGHT; y++) { |
61 | 16.5M | for (int x = 0; x < GRAIN_WIDTH; x++) { |
62 | 16.3M | const int value = get_random_number(11, &seed); |
63 | 16.3M | buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift); |
64 | 16.3M | } |
65 | 199k | } |
66 | | |
67 | 2.73k | const int ar_pad = 3; |
68 | 2.73k | const int ar_lag = data->ar_coeff_lag; |
69 | | |
70 | 194k | for (int y = ar_pad; y < GRAIN_HEIGHT; y++) { |
71 | 14.7M | for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) { |
72 | 14.5M | const int8_t *coeff = data->ar_coeffs_y; |
73 | 14.5M | int sum = 0; |
74 | 45.4M | for (int dy = -ar_lag; dy <= 0; dy++) { |
75 | 133M | for (int dx = -ar_lag; dx <= ar_lag; dx++) { |
76 | 116M | if (!dx && !dy) |
77 | 14.5M | break; |
78 | 102M | sum += *(coeff++) * buf[y + dy][x + dx]; |
79 | 102M | } |
80 | 30.9M | } |
81 | | |
82 | 14.5M | const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift); |
83 | 14.5M | buf[y][x] = iclip(grain, grain_min, grain_max); |
84 | 14.5M | } |
85 | 191k | } |
86 | 2.73k | } |
87 | | |
88 | | static NOINLINE void |
89 | | generate_grain_uv_c(entry buf[][GRAIN_WIDTH], |
90 | | const entry buf_y[][GRAIN_WIDTH], |
91 | | const Dav1dFilmGrainData *const data, const intptr_t uv, |
92 | | const int subx, const int suby HIGHBD_DECL_SUFFIX) |
93 | 3.77k | { |
94 | 3.77k | const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; |
95 | 3.77k | unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524); |
96 | 3.77k | const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift; |
97 | 3.77k | const int grain_ctr = 128 << bitdepth_min_8; |
98 | 3.77k | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; |
99 | | |
100 | 3.77k | const int chromaW = subx ? SUB_GRAIN_WIDTH : GRAIN_WIDTH; |
101 | 3.77k | const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT; |
102 | | |
103 | 242k | for (int y = 0; y < chromaH; y++) { |
104 | 17.7M | for (int x = 0; x < chromaW; x++) { |
105 | 17.4M | const int value = get_random_number(11, &seed); |
106 | 17.4M | buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift); |
107 | 17.4M | } |
108 | 238k | } |
109 | | |
110 | 3.77k | const int ar_pad = 3; |
111 | 3.77k | const int ar_lag = data->ar_coeff_lag; |
112 | | |
113 | 231k | for (int y = ar_pad; y < chromaH; y++) { |
114 | 15.5M | for (int x = ar_pad; x < chromaW - ar_pad; x++) { |
115 | 15.3M | const int8_t *coeff = data->ar_coeffs_uv[uv]; |
116 | 15.3M | int sum = 0; |
117 | 49.5M | for (int dy = -ar_lag; dy <= 0; dy++) { |
118 | 152M | for (int dx = -ar_lag; dx <= ar_lag; dx++) { |
119 | | // For the final (current) pixel, we need to add in the |
120 | | // contribution from the luma grain texture |
121 | 133M | if (!dx && !dy) { |
122 | 15.3M | if (!data->num_y_points) |
123 | 5.88M | break; |
124 | 9.44M | int luma = 0; |
125 | 9.44M | const int lumaX = ((x - ar_pad) << subx) + ar_pad; |
126 | 9.44M | const int lumaY = ((y - ar_pad) << suby) + ar_pad; |
127 | 20.1M | for (int i = 0; i <= suby; i++) { |
128 | 23.9M | for (int j = 0; j <= subx; j++) { |
129 | 13.2M | luma += buf_y[lumaY + i][lumaX + j]; |
130 | 13.2M | } |
131 | 10.6M | } |
132 | 9.44M | luma = round2(luma, subx + suby); |
133 | 9.44M | sum += luma * (*coeff); |
134 | 9.44M | break; |
135 | 15.3M | } |
136 | | |
137 | 118M | sum += *(coeff++) * buf[y + dy][x + dx]; |
138 | 118M | } |
139 | 34.2M | } |
140 | | |
141 | 15.3M | const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift); |
142 | 15.3M | buf[y][x] = iclip(grain, grain_min, grain_max); |
143 | 15.3M | } |
144 | 227k | } |
145 | 3.77k | } |
146 | | |
147 | | #define gnuv_ss_fn(nm, ss_x, ss_y) \ |
148 | 3.77k | static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \ |
149 | 3.77k | generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \ |
150 | 3.77k | } filmgrain_tmpl.c:generate_grain_uv_420_c Line | Count | Source | 148 | 1.05k | static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \ | 149 | 1.05k | generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \ | 150 | 1.05k | } |
filmgrain_tmpl.c:generate_grain_uv_422_c Line | Count | Source | 148 | 207 | static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \ | 149 | 207 | generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \ | 150 | 207 | } |
filmgrain_tmpl.c:generate_grain_uv_444_c Line | Count | Source | 148 | 2.51k | static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \ | 149 | 2.51k | generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \ | 150 | 2.51k | } |
|
151 | | |
152 | | gnuv_ss_fn(420, 1, 1); |
153 | | gnuv_ss_fn(422, 1, 0); |
154 | | gnuv_ss_fn(444, 0, 0); |
155 | | |
156 | | // samples from the correct block of a grain LUT, while taking into account the |
157 | | // offsets provided by the offsets cache |
158 | | static inline entry sample_lut(const entry grain_lut[][GRAIN_WIDTH], |
159 | | const int offsets[2][2], const int subx, const int suby, |
160 | | const int bx, const int by, const int x, const int y) |
161 | 23.7M | { |
162 | 23.7M | const int randval = offsets[bx][by]; |
163 | 23.7M | const int offx = 3 + (2 >> subx) * (3 + (randval >> 4)); |
164 | 23.7M | const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF)); |
165 | 23.7M | return grain_lut[offy + y + (FG_BLOCK_SIZE >> suby) * by] |
166 | 23.7M | [offx + x + (FG_BLOCK_SIZE >> subx) * bx]; |
167 | 23.7M | } |
168 | | |
169 | | static void fgy_32x32xn_c(pixel *const dst_row, const pixel *const src_row, |
170 | | const ptrdiff_t stride, |
171 | | const Dav1dFilmGrainData *const data, const size_t pw, |
172 | | const uint8_t scaling[SCALING_SIZE], |
173 | | const entry grain_lut[][GRAIN_WIDTH], |
174 | | const int bh, const int row_num HIGHBD_DECL_SUFFIX) |
175 | 15.1k | { |
176 | 15.1k | const int rows = 1 + (data->overlap_flag && row_num > 0); |
177 | 15.1k | const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; |
178 | 15.1k | const int grain_ctr = 128 << bitdepth_min_8; |
179 | 15.1k | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; |
180 | | |
181 | 15.1k | int min_value, max_value; |
182 | 15.1k | if (data->clip_to_restricted_range) { |
183 | 7.86k | min_value = 16 << bitdepth_min_8; |
184 | 7.86k | max_value = 235 << bitdepth_min_8; |
185 | 7.86k | } else { |
186 | 7.31k | min_value = 0; |
187 | 7.31k | max_value = BITDEPTH_MAX; |
188 | 7.31k | } |
189 | | |
190 | | // seed[0] contains the current row, seed[1] contains the previous |
191 | 15.1k | unsigned seed[2]; |
192 | 39.0k | for (int i = 0; i < rows; i++) { |
193 | 23.8k | seed[i] = data->seed; |
194 | 23.8k | seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8; |
195 | 23.8k | seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF); |
196 | 23.8k | } |
197 | | |
198 | 15.1k | assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0); |
199 | | |
200 | 15.1k | int offsets[2 /* col offset */][2 /* row offset */]; |
201 | | |
202 | | // process this row in FG_BLOCK_SIZE^2 blocks |
203 | 41.8k | for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) { |
204 | 26.7k | const int bw = imin(FG_BLOCK_SIZE, (int) pw - bx); |
205 | | |
206 | 26.7k | if (data->overlap_flag && bx) { |
207 | | // shift previous offsets left |
208 | 26.2k | for (int i = 0; i < rows; i++) |
209 | 16.6k | offsets[1][i] = offsets[0][i]; |
210 | 9.67k | } |
211 | | |
212 | | // update current offsets |
213 | 68.7k | for (int i = 0; i < rows; i++) |
214 | 42.0k | offsets[0][i] = get_random_number(8, &seed[i]); |
215 | | |
216 | | // x/y block offsets to compensate for overlapped regions |
217 | 26.7k | const int ystart = data->overlap_flag && row_num ? imin(2, bh) : 0; |
218 | 26.7k | const int xstart = data->overlap_flag && bx ? imin(2, bw) : 0; |
219 | | |
220 | 26.7k | static const int w[2][2] = { { 27, 17 }, { 17, 27 } }; |
221 | | |
222 | 26.7k | #define add_noise_y(x, y, grain) \ |
223 | 10.8M | const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (x) + bx; \ |
224 | 10.8M | pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (x) + bx; \ |
225 | 10.8M | const int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \ |
226 | 10.8M | *dst = iclip(*src + noise, min_value, max_value); |
227 | | |
228 | 686k | for (int y = ystart; y < bh; y++) { |
229 | | // Non-overlapped image region (straightforward) |
230 | 10.5M | for (int x = xstart; x < bw; x++) { |
231 | 9.85M | int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); |
232 | 9.85M | add_noise_y(x, y, grain); |
233 | 9.85M | } |
234 | | |
235 | | // Special case for overlapped column |
236 | 1.11M | for (int x = 0; x < xstart; x++) { |
237 | 457k | int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); |
238 | 457k | int old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y); |
239 | 457k | grain = round2(old * w[x][0] + grain * w[x][1], 5); |
240 | 457k | grain = iclip(grain, grain_min, grain_max); |
241 | 457k | add_noise_y(x, y, grain); |
242 | 457k | } |
243 | 659k | } |
244 | | |
245 | 57.2k | for (int y = 0; y < ystart; y++) { |
246 | | // Special case for overlapped row (sans corner) |
247 | 585k | for (int x = xstart; x < bw; x++) { |
248 | 555k | int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); |
249 | 555k | int old = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y); |
250 | 555k | grain = round2(old * w[y][0] + grain * w[y][1], 5); |
251 | 555k | grain = iclip(grain, grain_min, grain_max); |
252 | 555k | add_noise_y(x, y, grain); |
253 | 555k | } |
254 | | |
255 | | // Special case for doubly-overlapped corner |
256 | 57.4k | for (int x = 0; x < xstart; x++) { |
257 | | // Blend the top pixel with the top left block |
258 | 26.8k | int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y); |
259 | 26.8k | int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y); |
260 | 26.8k | top = round2(old * w[x][0] + top * w[x][1], 5); |
261 | 26.8k | top = iclip(top, grain_min, grain_max); |
262 | | |
263 | | // Blend the current pixel with the left block |
264 | 26.8k | int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); |
265 | 26.8k | old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y); |
266 | 26.8k | grain = round2(old * w[x][0] + grain * w[x][1], 5); |
267 | 26.8k | grain = iclip(grain, grain_min, grain_max); |
268 | | |
269 | | // Mix the row rows together and apply grain |
270 | 26.8k | grain = round2(top * w[y][0] + grain * w[y][1], 5); |
271 | 26.8k | grain = iclip(grain, grain_min, grain_max); |
272 | 26.8k | add_noise_y(x, y, grain); |
273 | 26.8k | } |
274 | 30.5k | } |
275 | 26.7k | } |
276 | 15.1k | } |
277 | | |
278 | | static NOINLINE void |
279 | | fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row, |
280 | | const ptrdiff_t stride, const Dav1dFilmGrainData *const data, |
281 | | const size_t pw, const uint8_t scaling[SCALING_SIZE], |
282 | | const entry grain_lut[][GRAIN_WIDTH], const int bh, |
283 | | const int row_num, const pixel *const luma_row, |
284 | | const ptrdiff_t luma_stride, const int uv, const int is_id, |
285 | | const int sx, const int sy HIGHBD_DECL_SUFFIX) |
286 | 33.3k | { |
287 | 33.3k | const int rows = 1 + (data->overlap_flag && row_num > 0); |
288 | 33.3k | const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; |
289 | 33.3k | const int grain_ctr = 128 << bitdepth_min_8; |
290 | 33.3k | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; |
291 | | |
292 | 33.3k | int min_value, max_value; |
293 | 33.3k | if (data->clip_to_restricted_range) { |
294 | 20.7k | min_value = 16 << bitdepth_min_8; |
295 | 20.7k | max_value = (is_id ? 235 : 240) << bitdepth_min_8; |
296 | 20.7k | } else { |
297 | 12.5k | min_value = 0; |
298 | 12.5k | max_value = BITDEPTH_MAX; |
299 | 12.5k | } |
300 | | |
301 | | // seed[0] contains the current row, seed[1] contains the previous |
302 | 33.3k | unsigned seed[2]; |
303 | 86.3k | for (int i = 0; i < rows; i++) { |
304 | 53.0k | seed[i] = data->seed; |
305 | 53.0k | seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8; |
306 | 53.0k | seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF); |
307 | 53.0k | } |
308 | | |
309 | 33.3k | assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0); |
310 | | |
311 | 33.3k | int offsets[2 /* col offset */][2 /* row offset */]; |
312 | | |
313 | | // process this row in FG_BLOCK_SIZE^2 blocks (subsampled) |
314 | 82.8k | for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) { |
315 | 49.5k | const int bw = imin(FG_BLOCK_SIZE >> sx, (int)(pw - bx)); |
316 | 49.5k | if (data->overlap_flag && bx) { |
317 | | // shift previous offsets left |
318 | 34.6k | for (int i = 0; i < rows; i++) |
319 | 21.6k | offsets[1][i] = offsets[0][i]; |
320 | 13.0k | } |
321 | | |
322 | | // update current offsets |
323 | 126k | for (int i = 0; i < rows; i++) |
324 | 76.5k | offsets[0][i] = get_random_number(8, &seed[i]); |
325 | | |
326 | | // x/y block offsets to compensate for overlapped regions |
327 | 49.5k | const int ystart = data->overlap_flag && row_num ? imin(2 >> sy, bh) : 0; |
328 | 49.5k | const int xstart = data->overlap_flag && bx ? imin(2 >> sx, bw) : 0; |
329 | | |
330 | 49.5k | static const int w[2 /* sub */][2 /* off */][2] = { |
331 | 49.5k | { { 27, 17 }, { 17, 27 } }, |
332 | 49.5k | { { 23, 22 } }, |
333 | 49.5k | }; |
334 | | |
335 | 49.5k | #define add_noise_uv(x, y, grain) \ |
336 | 13.9M | const int lx = (bx + x) << sx; \ |
337 | 13.9M | const int ly = y << sy; \ |
338 | 13.9M | const pixel *const luma = luma_row + ly * PXSTRIDE(luma_stride) + lx; \ |
339 | 13.9M | pixel avg = luma[0]; \ |
340 | 13.9M | if (sx) \ |
341 | 13.9M | avg = (avg + luma[1] + 1) >> 1; \ |
342 | 13.9M | const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (bx + (x)); \ |
343 | 13.9M | pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x)); \ |
344 | 13.9M | int val = avg; \ |
345 | 13.9M | if (!data->chroma_scaling_from_luma) { \ |
346 | 5.62M | const int combined = avg * data->uv_luma_mult[uv] + \ |
347 | 5.62M | *src * data->uv_mult[uv]; \ |
348 | 5.62M | val = iclip_pixel( (combined >> 6) + \ |
349 | 5.62M | (data->uv_offset[uv] * (1 << bitdepth_min_8)) ); \ |
350 | 5.62M | } \ |
351 | 13.9M | const int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \ |
352 | 13.9M | *dst = iclip(*src + noise, min_value, max_value); |
353 | | |
354 | 1.08M | for (int y = ystart; y < bh; y++) { |
355 | | // Non-overlapped image region (straightforward) |
356 | 13.8M | for (int x = xstart; x < bw; x++) { |
357 | 12.8M | int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y); |
358 | 12.8M | add_noise_uv(x, y, grain); |
359 | 12.8M | } |
360 | | |
361 | | // Special case for overlapped column |
362 | 1.53M | for (int x = 0; x < xstart; x++) { |
363 | 498k | int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y); |
364 | 498k | int old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y); |
365 | 498k | grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5); |
366 | 498k | grain = iclip(grain, grain_min, grain_max); |
367 | 498k | add_noise_uv(x, y, grain); |
368 | 498k | } |
369 | 1.03M | } |
370 | | |
371 | 99.9k | for (int y = 0; y < ystart; y++) { |
372 | | // Special case for overlapped row (sans corner) |
373 | 688k | for (int x = xstart; x < bw; x++) { |
374 | 638k | int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y); |
375 | 638k | int old = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y); |
376 | 638k | grain = round2(old * w[sy][y][0] + grain * w[sy][y][1], 5); |
377 | 638k | grain = iclip(grain, grain_min, grain_max); |
378 | 638k | add_noise_uv(x, y, grain); |
379 | 638k | } |
380 | | |
381 | | // Special case for doubly-overlapped corner |
382 | 77.2k | for (int x = 0; x < xstart; x++) { |
383 | | // Blend the top pixel with the top left block |
384 | 26.8k | int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y); |
385 | 26.8k | int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y); |
386 | 26.8k | top = round2(old * w[sx][x][0] + top * w[sx][x][1], 5); |
387 | 26.8k | top = iclip(top, grain_min, grain_max); |
388 | | |
389 | | // Blend the current pixel with the left block |
390 | 26.8k | int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y); |
391 | 26.8k | old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y); |
392 | 26.8k | grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5); |
393 | 26.8k | grain = iclip(grain, grain_min, grain_max); |
394 | | |
395 | | // Mix the row rows together and apply to image |
396 | 26.8k | grain = round2(top * w[sy][y][0] + grain * w[sy][y][1], 5); |
397 | 26.8k | grain = iclip(grain, grain_min, grain_max); |
398 | 26.8k | add_noise_uv(x, y, grain); |
399 | 26.8k | } |
400 | 50.3k | } |
401 | 49.5k | } |
402 | 33.3k | } |
403 | | |
404 | | #define fguv_ss_fn(nm, ss_x, ss_y) \ |
405 | 33.4k | static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \ |
406 | 33.4k | fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \ |
407 | 33.4k | row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \ |
408 | 33.4k | HIGHBD_TAIL_SUFFIX); \ |
409 | 33.4k | } filmgrain_tmpl.c:fguv_32x32xn_420_c Line | Count | Source | 405 | 4.83k | static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \ | 406 | 4.83k | fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \ | 407 | 4.83k | row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \ | 408 | 4.83k | HIGHBD_TAIL_SUFFIX); \ | 409 | 4.83k | } |
filmgrain_tmpl.c:fguv_32x32xn_422_c Line | Count | Source | 405 | 591 | static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \ | 406 | 591 | fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \ | 407 | 591 | row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \ | 408 | 591 | HIGHBD_TAIL_SUFFIX); \ | 409 | 591 | } |
filmgrain_tmpl.c:fguv_32x32xn_444_c Line | Count | Source | 405 | 27.9k | static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \ | 406 | 27.9k | fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \ | 407 | 27.9k | row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \ | 408 | 27.9k | HIGHBD_TAIL_SUFFIX); \ | 409 | 27.9k | } |
|
410 | | |
411 | | fguv_ss_fn(420, 1, 1); |
412 | | fguv_ss_fn(422, 1, 0); |
413 | | fguv_ss_fn(444, 0, 0); |
414 | | |
415 | | #if HAVE_ASM |
416 | | #if ARCH_AARCH64 || ARCH_ARM |
417 | | #include "src/arm/filmgrain.h" |
418 | | #elif ARCH_X86 |
419 | | #include "src/x86/filmgrain.h" |
420 | | #endif |
421 | | #endif |
422 | | |
423 | 38.3k | COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) { |
424 | 38.3k | c->generate_grain_y = generate_grain_y_c; |
425 | 38.3k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c; |
426 | 38.3k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c; |
427 | 38.3k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c; |
428 | | |
429 | 38.3k | c->fgy_32x32xn = fgy_32x32xn_c; |
430 | 38.3k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c; |
431 | 38.3k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c; |
432 | 38.3k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c; |
433 | | |
434 | | #if HAVE_ASM |
435 | | #if ARCH_AARCH64 || ARCH_ARM |
436 | | film_grain_dsp_init_arm(c); |
437 | | #elif ARCH_X86 |
438 | | film_grain_dsp_init_x86(c); |
439 | | #endif |
440 | | #endif |
441 | 38.3k | } dav1d_film_grain_dsp_init_8bpc Line | Count | Source | 423 | 17.2k | COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) { | 424 | 17.2k | c->generate_grain_y = generate_grain_y_c; | 425 | 17.2k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c; | 426 | 17.2k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c; | 427 | 17.2k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c; | 428 | | | 429 | 17.2k | c->fgy_32x32xn = fgy_32x32xn_c; | 430 | 17.2k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c; | 431 | 17.2k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c; | 432 | 17.2k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c; | 433 | | | 434 | | #if HAVE_ASM | 435 | | #if ARCH_AARCH64 || ARCH_ARM | 436 | | film_grain_dsp_init_arm(c); | 437 | | #elif ARCH_X86 | 438 | | film_grain_dsp_init_x86(c); | 439 | | #endif | 440 | | #endif | 441 | 17.2k | } |
dav1d_film_grain_dsp_init_16bpc Line | Count | Source | 423 | 21.1k | COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) { | 424 | 21.1k | c->generate_grain_y = generate_grain_y_c; | 425 | 21.1k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c; | 426 | 21.1k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c; | 427 | 21.1k | c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c; | 428 | | | 429 | 21.1k | c->fgy_32x32xn = fgy_32x32xn_c; | 430 | 21.1k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c; | 431 | 21.1k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c; | 432 | 21.1k | c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c; | 433 | | | 434 | | #if HAVE_ASM | 435 | | #if ARCH_AARCH64 || ARCH_ARM | 436 | | film_grain_dsp_init_arm(c); | 437 | | #elif ARCH_X86 | 438 | | film_grain_dsp_init_x86(c); | 439 | | #endif | 440 | | #endif | 441 | 21.1k | } |
|