/src/vvdec/source/Lib/FilmGrain/FilmGrainImpl.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | /* This file is based on VFGS, available on |
44 | | * https://github.com/InterDigitalInc/VersatileFilmGrain |
45 | | * |
46 | | * VFGS implements film grain synthesis as a hardware model: it simulates the |
47 | | * output of a cost-effective hardware implementation in a video display |
48 | | * pipeline. Also, the C code is split into "fw" (firmware) and "hw" (hardware) |
49 | | * parts, and as self-explanatory as possible. See VFGS github repository for |
50 | | * more details. |
51 | | * |
52 | | * The VFGS github repository also contains other tools to experiment with film |
53 | | * grain synthesis (e.g. a graphical display and tuning tool for FGC SEI |
54 | | * message). |
55 | | */ |
56 | | |
57 | | #include "FilmGrainImpl.h" |
58 | | |
59 | | #include <cstring> // memcpy |
60 | | #include <algorithm> |
61 | | |
62 | | #include <CommonDef.h> |
63 | | |
64 | | namespace vvdec |
65 | | { |
66 | | |
67 | | /** Derive Y x/y offsets from (random) number |
68 | | * |
69 | | * Bit fields are designed to minimize overlaps across color channels, to |
70 | | * decorrelate them as much as possible. |
71 | | * |
72 | | * 10-bit for 12 or 13 bins makes a reasonably uniform distribution (1.2% |
73 | | * probability error). |
74 | | * |
75 | | * If 8-bit is requested to further simplify the multiplier, at the cost of less |
76 | | * uniform probability, the following bitfields can be considered: |
77 | | * |
78 | | * Y: sign = rnd[31], x = (rnd[7:0]*13 >> 8)*4, y = (rnd[21:14]*12 >> 8)*4 |
79 | | * U: sign = rnd[0], x = (rnd[17:10]*13 >> 8)*2, y = (rnd[31:24]*12 >> 8)*2 |
80 | | * V: sign = rnd[13], x = (rnd[27:20]*13 >> 8)*2, y = (rnd[11:4]*12 >> 8)*2 |
81 | | * |
82 | | * Note: to fully support cross-component correlation within patterns, we would |
83 | | * need to align luma/chroma offsets. |
84 | | */ |
85 | | void FilmGrainImpl::get_offset_y( uint32_t val, int* s, uint8_t* x, uint8_t* y ) |
86 | 0 | { |
87 | 0 | uint32_t bf; // bit field |
88 | |
|
89 | 0 | *s = ( ( val >> 31 ) & 1 ) ? -1 : 1; |
90 | |
|
91 | 0 | bf = ( val >> 0 ) & 0x3ff; |
92 | 0 | *x = ( ( bf * 13 ) >> 10 ) * 4; // 13 = 8 + 4 + 1 (two adders) |
93 | |
|
94 | 0 | bf = ( val >> 14 ) & 0x3ff; |
95 | 0 | *y = ( ( bf * 12 ) >> 10 ) * 4; // 12 = 8 + 4 (one adder) |
96 | | // Note: could shift 9 and * 2, to make a multiple of 2 and make use of all |
97 | | // pattern samples (when using overlap). |
98 | 0 | } |
99 | | |
100 | | void FilmGrainImpl::get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) const |
101 | 0 | { |
102 | 0 | uint32_t bf; // bit field |
103 | |
|
104 | 0 | *s = ( ( val >> 2 ) & 1 ) ? -1 : 1; |
105 | |
|
106 | 0 | bf = ( val >> 10 ) & 0x3ff; |
107 | 0 | *x = ( ( bf * 13 ) >> 10 ) * ( 4 / csubx ); |
108 | |
|
109 | 0 | bf = ( ( val >> 24 ) & 0x0ff ) | ( ( val << 8 ) & 0x300 ); |
110 | 0 | *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); |
111 | 0 | } |
112 | | |
113 | | void FilmGrainImpl::get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) const |
114 | 0 | { |
115 | 0 | uint32_t bf; // bit field |
116 | |
|
117 | 0 | *s = ( ( val >> 15 ) & 1 ) ? -1 : 1; |
118 | |
|
119 | 0 | bf = ( val >> 20 ) & 0x3ff; |
120 | 0 | *x = ( ( bf * 13 ) >> 10 ) * ( 4 / csubx ); |
121 | |
|
122 | 0 | bf = ( val >> 4 ) & 0x3ff; |
123 | 0 | *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); |
124 | 0 | } |
125 | | |
126 | | void FilmGrainImpl::add_grain_block( void* I, int c, int x, int y, int width, uint32_t rnd, uint32_t rnd_up, int16_t grain[3][32], uint8_t scale[3][32] ) const |
127 | 0 | { |
128 | 0 | const int subx = c ? csubx : 1; |
129 | 0 | const int suby = c ? csuby : 1; |
130 | |
|
131 | 0 | if( ( y & 1 ) && suby > 1 ) |
132 | 0 | { |
133 | 0 | return; |
134 | 0 | } |
135 | | |
136 | 0 | CHECK( x & 15, "x not a multiple of 16" ); |
137 | 0 | CHECK( width <= 128, "wrong width" ); |
138 | 0 | CHECK( bs != 0 && bs != 2, "wrong bs" ); |
139 | 0 | CHECK( scale_shift + bs < 8 || scale_shift + bs > 13, "wrong scale_shift" ); |
140 | | |
141 | | // TODO: assert subx, suby, Y/C min/max, max pLUT values, etc |
142 | |
|
143 | 0 | const int j = y & 0xf; |
144 | |
|
145 | 0 | uint8_t oc1, oc2; // overlapping coefficients |
146 | 0 | if( y > 15 && j == 0 ) // first line of overlap |
147 | 0 | { |
148 | 0 | oc1 = ( suby > 1 ) ? 20 : 12; // current |
149 | 0 | oc2 = ( suby > 1 ) ? 20 : 24; // upper |
150 | 0 | } |
151 | 0 | else if( y > 15 && j == 1 ) // second line of overlap |
152 | 0 | { |
153 | 0 | oc1 = 24; |
154 | 0 | oc2 = 12; |
155 | 0 | } |
156 | 0 | else |
157 | 0 | { |
158 | 0 | oc1 = oc2 = 0; |
159 | 0 | } |
160 | | |
161 | | // Derive block offsets + sign |
162 | 0 | int s; // random sign flip (current) |
163 | 0 | uint8_t ox, oy; // random offset (current) |
164 | 0 | if( c == 0 ) |
165 | 0 | { |
166 | 0 | get_offset_y( rnd, &s, &ox, &oy ); |
167 | 0 | } |
168 | 0 | else if( c == 1 ) |
169 | 0 | { |
170 | 0 | get_offset_u( rnd, &s, &ox, &oy ); |
171 | 0 | } |
172 | 0 | else |
173 | 0 | { |
174 | 0 | get_offset_v( rnd, &s, &ox, &oy ); |
175 | 0 | } |
176 | 0 | oy += j / suby; |
177 | | |
178 | | // Same for upper block (overlap) |
179 | 0 | int s_up; // random sign flip (upper row) |
180 | 0 | uint8_t ox_up, oy_up; // random offset (upper row) |
181 | 0 | if( c == 0 ) |
182 | 0 | { |
183 | 0 | get_offset_y( rnd_up, &s_up, &ox_up, &oy_up ); |
184 | 0 | } |
185 | 0 | else if( c == 1 ) |
186 | 0 | { |
187 | 0 | get_offset_u( rnd_up, &s_up, &ox_up, &oy_up ); |
188 | 0 | } |
189 | 0 | else |
190 | 0 | { |
191 | 0 | get_offset_v( rnd_up, &s_up, &ox_up, &oy_up ); |
192 | 0 | } |
193 | 0 | oy_up += ( 16 + j ) / suby; |
194 | | |
195 | | // Make grain pattern |
196 | 0 | make_grain_pattern( I, c, x, subx, oc1, oc2, ox, ox_up, oy, oy_up, s, s_up, grain, scale ); |
197 | | |
198 | | // Scale & output |
199 | 0 | scale_and_output( I, c, x, subx, width, grain, scale ); |
200 | 0 | } |
201 | | |
202 | | void FilmGrainImpl::make_grain_pattern( const void* I, |
203 | | int c, |
204 | | int x, |
205 | | int subx, |
206 | | uint8_t oc1, |
207 | | uint8_t oc2, |
208 | | uint8_t ox, |
209 | | uint8_t ox_up, |
210 | | uint8_t oy, |
211 | | uint8_t oy_up, |
212 | | int s, |
213 | | int s_up, |
214 | | int16_t grain[3][32], |
215 | | uint8_t scale[3][32] ) const |
216 | 0 | { |
217 | 0 | const uint8_t* I8 = (const uint8_t*) I; |
218 | 0 | const uint16_t* I16 = (const uint16_t*) I; |
219 | 0 | { |
220 | 0 | for( int i = 0; i < 16 / subx; i++ ) |
221 | 0 | { |
222 | 0 | uint8_t intensity = bs ? I16[x / subx + i] >> bs : I8[x / subx + i]; |
223 | 0 | uint8_t pi = pLUT[c][intensity] >> 4; // pattern index (integer part) |
224 | 0 | int P = pattern[c ? 1 : 0][pi][oy][ox + i] * s; // Pattern sample (from current pattern index) |
225 | | // We could consider just XORing the sign bit |
226 | | #if PATTERN_INTERPOLATION |
227 | | uint8_t pf = pLUT[c][intensity] & 15; // pattern index fractional part (interpolate with next) -- could restrict to less bits (e.g. 2) |
228 | | int Pn = |
229 | | pattern[c ? 1 : 0][pi + 1][oy][ox + i] * s; // Next-pattern sample (from pattern index+1) |
230 | | // But there are equivalent hw tricks, e.g. storing values as sign + amplitude instead of two's complement |
231 | | #endif |
232 | |
|
233 | 0 | if( oc1 ) // overlap |
234 | 0 | { |
235 | 0 | P = round( P * oc1 + pattern[c ? 1 : 0][pi][oy_up][ox_up + i] * oc2 * s_up, 5 ); |
236 | | #if PATTERN_INTERPOLATION |
237 | | Pn = round( Pn * oc1 + pattern[c ? 1 : 0][pi + 1][oy_up][ox_up + i] * oc2 * s_up, 5 ); |
238 | | #endif |
239 | 0 | } |
240 | | #if PATTERN_INTERPOLATION |
241 | | // Pattern interpolation: P is current, Pn is next, pf is interpolation coefficient |
242 | | grain[c][16 / subx + i] = round( P * ( 16 - pf ) + Pn * pf, 4 ); |
243 | | #else |
244 | 0 | grain[c][16 / subx + i] = P; |
245 | 0 | #endif |
246 | | // Scale sign already integrated above because of overlap |
247 | 0 | scale[c][16 / subx + i] = sLUT[c][intensity]; |
248 | 0 | } |
249 | 0 | } |
250 | 0 | } |
251 | | |
252 | | void FilmGrainImpl::scale_and_output( void* I, int c, int x, int subx, int width, int16_t grain[3][32], uint8_t scale[3][32] ) const |
253 | 0 | { |
254 | 0 | uint8_t* I8 = (uint8_t*) I; |
255 | 0 | uint16_t* I16 = (uint16_t*) I; |
256 | |
|
257 | 0 | const uint8_t I_min = c ? C_min : Y_min; |
258 | 0 | const uint8_t I_max = c ? C_max : Y_max; |
259 | |
|
260 | 0 | int flush = 0; |
261 | 0 | do |
262 | 0 | { |
263 | 0 | if( x > 0 ) |
264 | 0 | { |
265 | 0 | if( !flush ) |
266 | 0 | { |
267 | | // Horizontal deblock (across previous block) |
268 | 0 | int16_t l1, l0, r0, r1; |
269 | |
|
270 | 0 | l1 = grain[c][16 / subx - 2]; |
271 | 0 | l0 = grain[c][16 / subx - 1]; |
272 | 0 | r0 = grain[c][16 / subx + 0]; |
273 | 0 | r1 = grain[c][16 / subx + 1]; |
274 | |
|
275 | 0 | grain[c][16 / subx - 1] = round( l1 + 3 * l0 + r0, 2 ); |
276 | 0 | grain[c][16 / subx + 0] = round( l0 + 3 * r0 + r1, 2 ); |
277 | 0 | } |
278 | 0 | { |
279 | 0 | for( int i = 0; i < 16 / subx; i++ ) |
280 | 0 | { |
281 | | // Output previous block (or flush current) |
282 | 0 | int32_t g = round( scale[c][i] * (int16_t) grain[c][i], scale_shift ); |
283 | 0 | if( bs ) |
284 | 0 | { |
285 | 0 | I16[( x - 16 ) / subx + i] = std::max<int32_t>( I_min << bs, std::min<int32_t>( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); |
286 | 0 | } |
287 | 0 | else |
288 | 0 | { |
289 | 0 | I8[( x - 16 ) / subx + i] = std::max<int32_t>( I_min, std::min<int32_t>( I_max, I8[( x - 16 ) / subx + i] + g ) ); |
290 | 0 | } |
291 | 0 | } |
292 | 0 | } |
293 | 0 | } |
294 | | |
295 | | // Shift pipeline |
296 | 0 | if( !flush ) |
297 | 0 | { |
298 | 0 | if( c == 0 ) |
299 | 0 | { |
300 | 0 | for( int i = 0; i < 16; i++ ) |
301 | 0 | { |
302 | 0 | grain[0][i] = grain[0][i + 16]; |
303 | 0 | scale[0][i] = scale[0][i + 16]; |
304 | 0 | } |
305 | 0 | } |
306 | 0 | else |
307 | 0 | { |
308 | 0 | for( int i = 0; i < 8; i++ ) |
309 | 0 | { |
310 | 0 | grain[c][i] = grain[c][i + 8]; |
311 | 0 | scale[c][i] = scale[c][i + 8]; |
312 | 0 | } |
313 | 0 | } |
314 | 0 | } |
315 | |
|
316 | 0 | if( x + 16 >= width ) |
317 | 0 | { |
318 | 0 | flush++; |
319 | 0 | x += 16; |
320 | 0 | } |
321 | 0 | } while( flush == 1 ); |
322 | 0 | } |
323 | | |
324 | | /* Public interface ***********************************************************/ |
325 | | |
326 | | void FilmGrainImpl::set_luma_pattern( int index, int8_t* P ) |
327 | 0 | { |
328 | 0 | CHECK( index < 0 || index >= 8, "luma pattern index out of bounds" ); |
329 | 0 | memcpy( pattern[0][index], P, 64 * 64 ); |
330 | 0 | } |
331 | | |
332 | | void FilmGrainImpl::set_chroma_pattern( int index, int8_t* P ) |
333 | 0 | { |
334 | 0 | CHECK( index < 0 || index >= 8, "chroma pattern index out of bounds" ); |
335 | 0 | for( int i = 0; i < 64 / csuby; i++ ) |
336 | 0 | { |
337 | 0 | memcpy( pattern[1][index][i], P + ( 64 / csuby ) * i, 64 / csubx ); |
338 | 0 | } |
339 | 0 | } |
340 | | |
341 | | void FilmGrainImpl::set_scale_lut( int c, uint8_t lut[] ) |
342 | 0 | { |
343 | 0 | CHECK( c < 0 || c >= 3, "scale lut idx out of bounds" ); |
344 | 0 | memcpy( sLUT[c], lut, 256 ); |
345 | 0 | } |
346 | | |
347 | | void FilmGrainImpl::set_pattern_lut( int c, uint8_t lut[], bool all0 ) |
348 | 0 | { |
349 | 0 | CHECK( c < 0 || c >= 3, "pattern lut idx out of bounds" ); |
350 | 0 | allZero[c] = all0; |
351 | 0 | memcpy( pLUT[c], lut, 256 ); |
352 | 0 | } |
353 | | |
354 | | void FilmGrainImpl::set_scale_shift( int shift ) |
355 | 0 | { |
356 | 0 | CHECK( shift < 2 || shift >= 8, "scale shift out of range" ); |
357 | 0 | scale_shift = shift + 6 - bs; |
358 | 0 | } |
359 | | |
360 | | void FilmGrainImpl::set_depth( int depth ) |
361 | 0 | { |
362 | 0 | CHECK( depth != 8 && depth != 10, "only bit depth 8 and 10 supported." ) |
363 | | |
364 | 0 | if( bs == 0 && depth > 8 ) |
365 | 0 | { |
366 | 0 | scale_shift -= 2; |
367 | 0 | } |
368 | 0 | if( bs == 2 && depth == 8 ) |
369 | 0 | { |
370 | 0 | scale_shift += 2; |
371 | 0 | } |
372 | |
|
373 | 0 | bs = depth - 8; |
374 | 0 | } |
375 | | |
376 | | void FilmGrainImpl::set_chroma_subsampling( int subx, int suby ) |
377 | 0 | { |
378 | 0 | CHECK( subx != 1 && subx != 2, "chroma subsampling should be 1 or 2" ); |
379 | 0 | CHECK( suby != 1 && suby != 2, "chroma subsampling should be 1 or 2" ); |
380 | 0 | csubx = subx; |
381 | 0 | csuby = suby; |
382 | 0 | } |
383 | | |
384 | | FilmGrainImpl::FilmGrainImpl() |
385 | 0 | { |
386 | 0 | memset( pattern, 0, sizeof( pattern ) ); |
387 | 0 | memset( sLUT, 0, sizeof( sLUT ) ); |
388 | 0 | memset( pLUT, 0, sizeof( pLUT ) ); |
389 | 0 | } |
390 | | |
391 | | } // namespace vvdec |