/src/libavif/ext/dav1d/src/fg_apply_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, Niklas Haas |
3 | | * Copyright © 2018, VideoLAN and dav1d authors |
4 | | * Copyright © 2018, Two Orioles, LLC |
5 | | * All rights reserved. |
6 | | * |
7 | | * Redistribution and use in source and binary forms, with or without |
8 | | * modification, are permitted provided that the following conditions are met: |
9 | | * |
10 | | * 1. Redistributions of source code must retain the above copyright notice, this |
11 | | * list of conditions and the following disclaimer. |
12 | | * |
13 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
14 | | * this list of conditions and the following disclaimer in the documentation |
15 | | * and/or other materials provided with the distribution. |
16 | | * |
17 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
18 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
19 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
20 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
21 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
22 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
23 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
24 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
26 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | | */ |
28 | | |
29 | | #include "config.h" |
30 | | |
31 | | #include <stdint.h> |
32 | | |
33 | | #include "dav1d/common.h" |
34 | | #include "dav1d/picture.h" |
35 | | |
36 | | #include "common/intops.h" |
37 | | #include "common/bitdepth.h" |
38 | | |
39 | | #include "src/fg_apply.h" |
40 | | |
41 | | static void generate_scaling(const int bitdepth, |
42 | | const uint8_t points[][2], const int num, |
43 | | uint8_t scaling[SCALING_SIZE]) |
44 | 334 | { |
45 | 334 | #if BITDEPTH == 8 |
46 | 334 | const int shift_x = 0; |
47 | 334 | const int scaling_size = SCALING_SIZE; |
48 | | #else |
49 | | assert(bitdepth > 8); |
50 | | const int shift_x = bitdepth - 8; |
51 | | const int scaling_size = 1 << bitdepth; |
52 | | #endif |
53 | | |
54 | 334 | if (num == 0) { |
55 | 60 | memset(scaling, 0, scaling_size); |
56 | 60 | return; |
57 | 60 | } |
58 | | |
59 | | // Fill up the preceding entries with the initial value |
60 | 274 | memset(scaling, points[0][1], points[0][0] << shift_x); |
61 | | |
62 | | // Linearly interpolate the values in the middle |
63 | 464 | for (int i = 0; i < num - 1; i++) { |
64 | 190 | const int bx = points[i][0]; |
65 | 190 | const int by = points[i][1]; |
66 | 190 | const int ex = points[i+1][0]; |
67 | 190 | const int ey = points[i+1][1]; |
68 | 190 | const int dx = ex - bx; |
69 | 190 | const int dy = ey - by; |
70 | 190 | assert(dx > 0); |
71 | 190 | const int delta = dy * ((0x10000 + (dx >> 1)) / dx); |
72 | 10.9k | for (int x = 0, d = 0x8000; x < dx; x++) { |
73 | 10.7k | scaling[(bx + x) << shift_x] = by + (d >> 16); |
74 | 10.7k | d += delta; |
75 | 10.7k | } |
76 | 190 | } |
77 | | |
78 | | // Fill up the remaining entries with the final value |
79 | 274 | const int n = points[num - 1][0] << shift_x; |
80 | 274 | memset(&scaling[n], points[num - 1][1], scaling_size - n); |
81 | | |
82 | | #if BITDEPTH != 8 |
83 | | const int pad = 1 << shift_x, rnd = pad >> 1; |
84 | | for (int i = 0; i < num - 1; i++) { |
85 | | const int bx = points[i][0] << shift_x; |
86 | | const int ex = points[i+1][0] << shift_x; |
87 | | const int dx = ex - bx; |
88 | | for (int x = 0; x < dx; x += pad) { |
89 | | const int range = scaling[bx + x + pad] - scaling[bx + x]; |
90 | | for (int n = 1, r = rnd; n < pad; n++) { |
91 | | r += range; |
92 | | scaling[bx + x + n] = scaling[bx + x] + (r >> shift_x); |
93 | | } |
94 | | } |
95 | | } |
96 | | #endif |
97 | 274 | } |
98 | | |
99 | | #ifndef UNIT_TEST |
100 | | void bitfn(dav1d_prep_grain)(const Dav1dFilmGrainDSPContext *const dsp, |
101 | | Dav1dPicture *const out, |
102 | | const Dav1dPicture *const in, |
103 | | uint8_t scaling[3][SCALING_SIZE], |
104 | | entry grain_lut[3][GRAIN_HEIGHT+1][GRAIN_WIDTH]) |
105 | 515 | { |
106 | 515 | const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; |
107 | | #if BITDEPTH != 8 |
108 | | const int bitdepth_max = (1 << out->p.bpc) - 1; |
109 | | #endif |
110 | | |
111 | | // Generate grain LUTs as needed |
112 | 515 | dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed |
113 | 515 | if (data->num_uv_points[0] || data->chroma_scaling_from_luma) |
114 | 405 | dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0], |
115 | 405 | data, 0 HIGHBD_TAIL_SUFFIX); |
116 | 515 | if (data->num_uv_points[1] || data->chroma_scaling_from_luma) |
117 | 415 | dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0], |
118 | 415 | data, 1 HIGHBD_TAIL_SUFFIX); |
119 | | |
120 | | // Generate scaling LUTs as needed |
121 | 515 | if (data->num_y_points || data->chroma_scaling_from_luma) |
122 | 495 | generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]); |
123 | 515 | if (data->num_uv_points[0]) |
124 | 51 | generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]); |
125 | 515 | if (data->num_uv_points[1]) |
126 | 61 | generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]); |
127 | | |
128 | | // Copy over the non-modified planes |
129 | 515 | assert(out->stride[0] == in->stride[0]); |
130 | 515 | if (!data->num_y_points) { |
131 | 111 | const ptrdiff_t stride = out->stride[0]; |
132 | 111 | const ptrdiff_t sz = out->p.h * stride; |
133 | 111 | if (sz < 0) |
134 | 0 | memcpy((uint8_t*) out->data[0] + sz - stride, |
135 | 0 | (uint8_t*) in->data[0] + sz - stride, -sz); |
136 | 111 | else |
137 | 111 | memcpy(out->data[0], in->data[0], sz); |
138 | 111 | } |
139 | | |
140 | 515 | if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) { |
141 | 82 | assert(out->stride[1] == in->stride[1]); |
142 | 82 | const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; |
143 | 82 | const ptrdiff_t stride = out->stride[1]; |
144 | 82 | const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride; |
145 | 82 | if (sz < 0) { |
146 | 0 | if (!data->num_uv_points[0]) |
147 | 0 | memcpy((uint8_t*) out->data[1] + sz - stride, |
148 | 0 | (uint8_t*) in->data[1] + sz - stride, -sz); |
149 | 0 | if (!data->num_uv_points[1]) |
150 | 0 | memcpy((uint8_t*) out->data[2] + sz - stride, |
151 | 0 | (uint8_t*) in->data[2] + sz - stride, -sz); |
152 | 82 | } else { |
153 | 82 | if (!data->num_uv_points[0]) |
154 | 31 | memcpy(out->data[1], in->data[1], sz); |
155 | 82 | if (!data->num_uv_points[1]) |
156 | 21 | memcpy(out->data[2], in->data[2], sz); |
157 | 82 | } |
158 | 82 | } |
159 | 515 | } Line | Count | Source | 105 | 275 | { | 106 | 275 | const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; | 107 | | #if BITDEPTH != 8 | 108 | | const int bitdepth_max = (1 << out->p.bpc) - 1; | 109 | | #endif | 110 | | | 111 | | // Generate grain LUTs as needed | 112 | 275 | dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed | 113 | 275 | if (data->num_uv_points[0] || data->chroma_scaling_from_luma) | 114 | 213 | dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0], | 115 | 213 | data, 0 HIGHBD_TAIL_SUFFIX); | 116 | 275 | if (data->num_uv_points[1] || data->chroma_scaling_from_luma) | 117 | 216 | dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0], | 118 | 216 | data, 1 HIGHBD_TAIL_SUFFIX); | 119 | | | 120 | | // Generate scaling LUTs as needed | 121 | 275 | if (data->num_y_points || data->chroma_scaling_from_luma) | 122 | 265 | generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]); | 123 | 275 | if (data->num_uv_points[0]) | 124 | 33 | generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]); | 125 | 275 | if (data->num_uv_points[1]) | 126 | 36 | generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]); | 127 | | | 128 | | // Copy over the non-modified planes | 129 | 275 | assert(out->stride[0] == in->stride[0]); | 130 | 275 | if (!data->num_y_points) { | 131 | 70 | const ptrdiff_t stride = out->stride[0]; | 132 | 70 | const ptrdiff_t sz = out->p.h * stride; | 133 | 70 | if (sz < 0) | 134 | 0 | memcpy((uint8_t*) out->data[0] + sz - stride, | 135 | 0 | (uint8_t*) in->data[0] + sz - stride, -sz); | 136 | 70 | else | 137 | 70 | memcpy(out->data[0], in->data[0], sz); | 138 | 70 | } | 139 | | | 140 | 275 | if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) { | 141 | 47 | assert(out->stride[1] == in->stride[1]); | 142 | 47 | const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; | 143 | 47 | const ptrdiff_t stride = out->stride[1]; | 144 | 47 | const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride; | 145 | 47 | if (sz < 0) { | 146 | 0 | if (!data->num_uv_points[0]) | 147 | 0 | memcpy((uint8_t*) out->data[1] + sz - stride, | 148 | 0 | (uint8_t*) in->data[1] + sz - stride, -sz); | 149 | 0 | if (!data->num_uv_points[1]) | 150 | 0 | memcpy((uint8_t*) out->data[2] + sz - stride, | 151 | 0 | (uint8_t*) in->data[2] + sz - stride, -sz); | 152 | 47 | } else { | 153 | 47 | if (!data->num_uv_points[0]) | 154 | 14 | memcpy(out->data[1], in->data[1], sz); | 155 | 47 | if (!data->num_uv_points[1]) | 156 | 11 | memcpy(out->data[2], in->data[2], sz); | 157 | 47 | } | 158 | 47 | } | 159 | 275 | } |
Line | Count | Source | 105 | 240 | { | 106 | 240 | const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; | 107 | 240 | #if BITDEPTH != 8 | 108 | 240 | const int bitdepth_max = (1 << out->p.bpc) - 1; | 109 | 240 | #endif | 110 | | | 111 | | // Generate grain LUTs as needed | 112 | 240 | dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed | 113 | 240 | if (data->num_uv_points[0] || data->chroma_scaling_from_luma) | 114 | 192 | dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0], | 115 | 192 | data, 0 HIGHBD_TAIL_SUFFIX); | 116 | 240 | if (data->num_uv_points[1] || data->chroma_scaling_from_luma) | 117 | 199 | dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0], | 118 | 199 | data, 1 HIGHBD_TAIL_SUFFIX); | 119 | | | 120 | | // Generate scaling LUTs as needed | 121 | 240 | if (data->num_y_points || data->chroma_scaling_from_luma) | 122 | 230 | generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]); | 123 | 240 | if (data->num_uv_points[0]) | 124 | 18 | generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]); | 125 | 240 | if (data->num_uv_points[1]) | 126 | 25 | generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]); | 127 | | | 128 | | // Copy over the non-modified planes | 129 | 240 | assert(out->stride[0] == in->stride[0]); | 130 | 240 | if (!data->num_y_points) { | 131 | 41 | const ptrdiff_t stride = out->stride[0]; | 132 | 41 | const ptrdiff_t sz = out->p.h * stride; | 133 | 41 | if (sz < 0) | 134 | 0 | memcpy((uint8_t*) out->data[0] + sz - stride, | 135 | 0 | (uint8_t*) in->data[0] + sz - stride, -sz); | 136 | 41 | else | 137 | 41 | memcpy(out->data[0], in->data[0], sz); | 138 | 41 | } | 139 | | | 140 | 240 | if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) { | 141 | 35 | assert(out->stride[1] == in->stride[1]); | 142 | 35 | const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; | 143 | 35 | const ptrdiff_t stride = out->stride[1]; | 144 | 35 | const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride; | 145 | 35 | if (sz < 0) { | 146 | 0 | if (!data->num_uv_points[0]) | 147 | 0 | memcpy((uint8_t*) out->data[1] + sz - stride, | 148 | 0 | (uint8_t*) in->data[1] + sz - stride, -sz); | 149 | 0 | if (!data->num_uv_points[1]) | 150 | 0 | memcpy((uint8_t*) out->data[2] + sz - stride, | 151 | 0 | (uint8_t*) in->data[2] + sz - stride, -sz); | 152 | 35 | } else { | 153 | 35 | if (!data->num_uv_points[0]) | 154 | 17 | memcpy(out->data[1], in->data[1], sz); | 155 | 35 | if (!data->num_uv_points[1]) | 156 | 10 | memcpy(out->data[2], in->data[2], sz); | 157 | 35 | } | 158 | 35 | } | 159 | 240 | } |
|
160 | | |
161 | | void bitfn(dav1d_apply_grain_row)(const Dav1dFilmGrainDSPContext *const dsp, |
162 | | Dav1dPicture *const out, |
163 | | const Dav1dPicture *const in, |
164 | | const uint8_t scaling[3][SCALING_SIZE], |
165 | | const entry grain_lut[3][GRAIN_HEIGHT+1][GRAIN_WIDTH], |
166 | | const int row) |
167 | 18.1k | { |
168 | | // Synthesize grain for the affected planes |
169 | 18.1k | const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; |
170 | 18.1k | const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; |
171 | 18.1k | const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444; |
172 | 18.1k | const int cpw = (out->p.w + ss_x) >> ss_x; |
173 | 18.1k | const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY; |
174 | 18.1k | pixel *const luma_src = |
175 | 18.1k | ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]); |
176 | | #if BITDEPTH != 8 |
177 | | const int bitdepth_max = (1 << out->p.bpc) - 1; |
178 | | #endif |
179 | | |
180 | 18.1k | if (data->num_y_points) { |
181 | 15.1k | const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE); |
182 | 15.1k | dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]), |
183 | 15.1k | luma_src, out->stride[0], data, |
184 | 15.1k | out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX); |
185 | 15.1k | } |
186 | | |
187 | 18.1k | if (!data->num_uv_points[0] && !data->num_uv_points[1] && |
188 | 15.5k | !data->chroma_scaling_from_luma) |
189 | 1.61k | { |
190 | 1.61k | return; |
191 | 1.61k | } |
192 | | |
193 | 16.5k | const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y; |
194 | | |
195 | | // extend padding pixels |
196 | 16.5k | if (out->p.w & ss_x) { |
197 | 938 | pixel *ptr = luma_src; |
198 | 15.7k | for (int y = 0; y < bh; y++) { |
199 | 14.7k | ptr[out->p.w] = ptr[out->p.w - 1]; |
200 | 14.7k | ptr += PXSTRIDE(in->stride[0]) << ss_y; |
201 | 14.7k | } |
202 | 938 | } |
203 | | |
204 | 16.5k | const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y; |
205 | 16.5k | if (data->chroma_scaling_from_luma) { |
206 | 41.6k | for (int pl = 0; pl < 2; pl++) |
207 | 27.6k | dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off, |
208 | 27.6k | ((const pixel *) in->data[1 + pl]) + uv_off, |
209 | 27.6k | in->stride[1], data, cpw, |
210 | 27.6k | scaling[0], grain_lut[1 + pl], |
211 | 27.6k | bh, row, luma_src, in->stride[0], |
212 | 27.6k | pl, is_id HIGHBD_TAIL_SUFFIX); |
213 | 13.9k | } else { |
214 | 7.34k | for (int pl = 0; pl < 2; pl++) |
215 | 4.73k | if (data->num_uv_points[pl]) |
216 | 2.92k | dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off, |
217 | 2.92k | ((const pixel *) in->data[1 + pl]) + uv_off, |
218 | 2.92k | in->stride[1], data, cpw, |
219 | 2.92k | scaling[1 + pl], grain_lut[1 + pl], |
220 | 2.92k | bh, row, luma_src, in->stride[0], |
221 | 2.92k | pl, is_id HIGHBD_TAIL_SUFFIX); |
222 | 2.61k | } |
223 | 16.5k | } dav1d_apply_grain_row_8bpc Line | Count | Source | 167 | 13.6k | { | 168 | | // Synthesize grain for the affected planes | 169 | 13.6k | const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; | 170 | 13.6k | const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; | 171 | 13.6k | const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444; | 172 | 13.6k | const int cpw = (out->p.w + ss_x) >> ss_x; | 173 | 13.6k | const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY; | 174 | 13.6k | pixel *const luma_src = | 175 | 13.6k | ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]); | 176 | | #if BITDEPTH != 8 | 177 | | const int bitdepth_max = (1 << out->p.bpc) - 1; | 178 | | #endif | 179 | | | 180 | 13.6k | if (data->num_y_points) { | 181 | 11.5k | const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE); | 182 | 11.5k | dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]), | 183 | 11.5k | luma_src, out->stride[0], data, | 184 | 11.5k | out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX); | 185 | 11.5k | } | 186 | | | 187 | 13.6k | if (!data->num_uv_points[0] && !data->num_uv_points[1] && | 188 | 12.1k | !data->chroma_scaling_from_luma) | 189 | 1.07k | { | 190 | 1.07k | return; | 191 | 1.07k | } | 192 | | | 193 | 12.5k | const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y; | 194 | | | 195 | | // extend padding pixels | 196 | 12.5k | if (out->p.w & ss_x) { | 197 | 589 | pixel *ptr = luma_src; | 198 | 9.88k | for (int y = 0; y < bh; y++) { | 199 | 9.29k | ptr[out->p.w] = ptr[out->p.w - 1]; | 200 | 9.29k | ptr += PXSTRIDE(in->stride[0]) << ss_y; | 201 | 9.29k | } | 202 | 589 | } | 203 | | | 204 | 12.5k | const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y; | 205 | 12.5k | if (data->chroma_scaling_from_luma) { | 206 | 32.9k | for (int pl = 0; pl < 2; pl++) | 207 | 21.8k | dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off, | 208 | 21.8k | ((const pixel *) in->data[1 + pl]) + uv_off, | 209 | 21.8k | in->stride[1], data, cpw, | 210 | 21.8k | scaling[0], grain_lut[1 + pl], | 211 | 21.8k | bh, row, luma_src, in->stride[0], | 212 | 21.8k | pl, is_id HIGHBD_TAIL_SUFFIX); | 213 | 11.0k | } else { | 214 | 3.94k | for (int pl = 0; pl < 2; pl++) | 215 | 2.47k | if (data->num_uv_points[pl]) | 216 | 1.42k | dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off, | 217 | 1.42k | ((const pixel *) in->data[1 + pl]) + uv_off, | 218 | 1.42k | in->stride[1], data, cpw, | 219 | 1.42k | scaling[1 + pl], grain_lut[1 + pl], | 220 | 1.42k | bh, row, luma_src, in->stride[0], | 221 | 1.42k | pl, is_id HIGHBD_TAIL_SUFFIX); | 222 | 1.47k | } | 223 | 12.5k | } |
dav1d_apply_grain_row_16bpc Line | Count | Source | 167 | 4.58k | { | 168 | | // Synthesize grain for the affected planes | 169 | 4.58k | const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; | 170 | 4.58k | const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; | 171 | 4.58k | const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444; | 172 | 4.58k | const int cpw = (out->p.w + ss_x) >> ss_x; | 173 | 4.58k | const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY; | 174 | 4.58k | pixel *const luma_src = | 175 | 4.58k | ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]); | 176 | 4.58k | #if BITDEPTH != 8 | 177 | 4.58k | const int bitdepth_max = (1 << out->p.bpc) - 1; | 178 | 4.58k | #endif | 179 | | | 180 | 4.58k | if (data->num_y_points) { | 181 | 3.60k | const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE); | 182 | 3.60k | dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]), | 183 | 3.60k | luma_src, out->stride[0], data, | 184 | 3.60k | out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX); | 185 | 3.60k | } | 186 | | | 187 | 4.58k | if (!data->num_uv_points[0] && !data->num_uv_points[1] && | 188 | 3.44k | !data->chroma_scaling_from_luma) | 189 | 541 | { | 190 | 541 | return; | 191 | 541 | } | 192 | | | 193 | 4.04k | const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y; | 194 | | | 195 | | // extend padding pixels | 196 | 4.04k | if (out->p.w & ss_x) { | 197 | 349 | pixel *ptr = luma_src; | 198 | 5.84k | for (int y = 0; y < bh; y++) { | 199 | 5.49k | ptr[out->p.w] = ptr[out->p.w - 1]; | 200 | 5.49k | ptr += PXSTRIDE(in->stride[0]) << ss_y; | 201 | 5.49k | } | 202 | 349 | } | 203 | | | 204 | 4.04k | const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y; | 205 | 4.04k | if (data->chroma_scaling_from_luma) { | 206 | 8.71k | for (int pl = 0; pl < 2; pl++) | 207 | 5.81k | dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off, | 208 | 5.81k | ((const pixel *) in->data[1 + pl]) + uv_off, | 209 | 5.81k | in->stride[1], data, cpw, | 210 | 5.81k | scaling[0], grain_lut[1 + pl], | 211 | 5.81k | bh, row, luma_src, in->stride[0], | 212 | 5.81k | pl, is_id HIGHBD_TAIL_SUFFIX); | 213 | 2.90k | } else { | 214 | 3.39k | for (int pl = 0; pl < 2; pl++) | 215 | 2.26k | if (data->num_uv_points[pl]) | 216 | 1.50k | dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off, | 217 | 1.50k | ((const pixel *) in->data[1 + pl]) + uv_off, | 218 | 1.50k | in->stride[1], data, cpw, | 219 | 1.50k | scaling[1 + pl], grain_lut[1 + pl], | 220 | 1.50k | bh, row, luma_src, in->stride[0], | 221 | 1.50k | pl, is_id HIGHBD_TAIL_SUFFIX); | 222 | 1.13k | } | 223 | 4.04k | } |
|
224 | | |
225 | | void bitfn(dav1d_apply_grain)(const Dav1dFilmGrainDSPContext *const dsp, |
226 | | Dav1dPicture *const out, |
227 | | const Dav1dPicture *const in) |
228 | 418 | { |
229 | 418 | ALIGN_STK_16(entry, grain_lut, 3,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]); |
230 | | #if ARCH_X86_64 && BITDEPTH == 8 |
231 | 216 | ALIGN_STK_64(uint8_t, scaling, 3,[SCALING_SIZE]); |
232 | | #else |
233 | | uint8_t scaling[3][SCALING_SIZE]; |
234 | | #endif |
235 | 418 | const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE; |
236 | | |
237 | 418 | bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut); |
238 | 11.4k | for (int row = 0; row < rows; row++) |
239 | 11.0k | bitfn(dav1d_apply_grain_row)(dsp, out, in, scaling, grain_lut, row); |
240 | 418 | } Line | Count | Source | 228 | 216 | { | 229 | 216 | ALIGN_STK_16(entry, grain_lut, 3,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]); | 230 | 216 | #if ARCH_X86_64 && BITDEPTH == 8 | 231 | 216 | ALIGN_STK_64(uint8_t, scaling, 3,[SCALING_SIZE]); | 232 | | #else | 233 | | uint8_t scaling[3][SCALING_SIZE]; | 234 | | #endif | 235 | 216 | const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE; | 236 | | | 237 | 216 | bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut); | 238 | 7.27k | for (int row = 0; row < rows; row++) | 239 | 7.06k | bitfn(dav1d_apply_grain_row)(dsp, out, in, scaling, grain_lut, row); | 240 | 216 | } |
Line | Count | Source | 228 | 202 | { | 229 | 202 | ALIGN_STK_16(entry, grain_lut, 3,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]); | 230 | | #if ARCH_X86_64 && BITDEPTH == 8 | 231 | | ALIGN_STK_64(uint8_t, scaling, 3,[SCALING_SIZE]); | 232 | | #else | 233 | 202 | uint8_t scaling[3][SCALING_SIZE]; | 234 | 202 | #endif | 235 | 202 | const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE; | 236 | | | 237 | 202 | bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut); | 238 | 4.16k | for (int row = 0; row < rows; row++) | 239 | 3.96k | bitfn(dav1d_apply_grain_row)(dsp, out, in, scaling, grain_lut, row); | 240 | 202 | } |
|
241 | | #endif |