/work/svt-av1/Source/Lib/Codec/grainSynthesis.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | /*!\file |
13 | | * \brief Describes film grain parameters and film grain synthesis |
14 | | * |
15 | | */ |
16 | | |
17 | | #include <stdio.h> |
18 | | #include <string.h> |
19 | | #include <stdlib.h> |
20 | | #include "grainSynthesis.h" |
21 | | #include "common_dsp_rtcd.h" |
22 | | #include "definitions.h" |
23 | | #include "svt_log.h" |
24 | | |
25 | | // Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits) |
26 | | // with zero mean and standard deviation of about 512. |
27 | | // should be divided by 4 for 10-bit range and 16 for 8-bit range. |
28 | | static const int32_t gaussian_sequence[2048] = { |
29 | | 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820, 224, 1248, 996, 272, -8, |
30 | | -916, -388, -732, -104, -188, 800, 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, |
31 | | 588, -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368, 432, -196, -720, -192, |
32 | | 1000, -332, 652, -136, -552, -604, -4, 192, -220, -136, 1000, -52, 372, -96, -624, 124, |
33 | | -24, 396, 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740, 248, -968, -848, |
34 | | 608, 376, -60, -292, -40, -156, 252, -292, 248, 224, -280, 400, -244, 244, -60, 76, |
35 | | -80, 212, 532, 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704, 220, -204, |
36 | | 640, -160, 1220, -408, 900, 336, 20, -336, -96, -792, 304, 48, -28, -1232, -1172, -448, |
37 | | 104, -292, -520, 244, 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136, 488, |
38 | | -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676, -376, 168, -108, 464, 8, 564, |
39 | | 64, 240, 308, -300, -400, -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844, |
40 | | -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96, -1244, -288, 276, 848, 832, |
41 | | -360, 656, 464, -384, -332, -356, 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, |
42 | | 280, 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808, 772, 20, 268, 88, |
43 | | -332, -284, 124, -384, -448, 208, -228, -1044, -328, 660, 380, -148, -300, 588, 240, 540, |
44 | | 28, 136, -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264, -528, -1108, 632, |
45 | | -484, -592, -344, 796, 124, -668, -768, 388, 1296, -232, -188, -200, -288, -4, 308, 100, |
46 | | -168, 256, -500, 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384, 548, -296, |
47 | | 428, -108, -8, -912, -324, -224, -88, -112, -220, -100, 996, -796, 548, 360, -216, 180, |
48 | | 428, -200, -212, 148, 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572, -332, |
49 | | -8, -180, -176, 696, 116, -88, 628, 76, 44, -516, 240, -208, -40, 100, -592, 344, |
50 | | -308, -452, -228, 20, 916, -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492, |
51 | | 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560, -1020, 180, -800, -64, 76, |
52 | | 576, 1068, 396, 660, 552, -108, -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, |
53 | | 516, -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88, -152, 1012, 1064, -228, |
54 | | 164, -376, -684, 592, -392, 156, 196, -524, -64, -884, 160, -176, 636, 648, 404, -396, |
55 | | -436, 864, 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920, 436, -48, 1176, |
56 | | -884, 416, -776, -824, -884, 524, -548, -564, -68, -164, -96, 692, 364, -692, -1012, -68, |
57 | | 260, -480, 876, -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244, 496, 372, |
58 | | -32, 280, 200, 112, -440, -96, 24, -644, -184, 56, -432, 224, -980, 272, -260, 144, |
59 | | -436, 420, 356, 364, -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72, 540, |
60 | | 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24, 424, 264, 1040, 128, -912, -524, |
61 | | -356, 64, 876, -12, 4, -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120, |
62 | | 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108, -260, 328, -268, 224, -200, |
63 | | -416, 184, -604, -564, -20, 296, 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, |
64 | | -336, -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164, -1560, -776, 1156, -428, |
65 | | 164, -504, -112, 120, -216, -148, -264, 308, 32, 64, -72, 72, 116, 176, -64, -272, |
66 | | 460, -536, -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296, -1196, -288, -560, |
67 | | 1040, -472, 116, -848, -1116, 116, 636, 696, 284, -176, 1016, 204, -864, -648, -248, 356, |
68 | | 972, -584, -204, 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212, -212, 52, |
69 | | 12, 200, 268, -488, -404, -880, 824, -672, -40, 908, -248, 500, 716, -576, 492, -576, |
70 | | 16, 720, -108, 384, 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8, 1268, |
71 | | 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704, -224, 596, -132, 268, 32, -452, |
72 | | 884, 104, -1008, 424, -1348, -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592, |
73 | | -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420, 320, 208, -144, -156, 156, |
74 | | 364, 452, 28, 540, 316, 220, -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, |
75 | | 208, -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544, -388, -264, 908, -800, |
76 | | -628, -612, -568, 572, -220, 164, 288, -16, -308, 308, -112, -636, -760, 280, -668, 432, |
77 | | 364, 240, -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132, 636, -76, 392, |
78 | | 4, -412, 540, 508, 328, -356, -36, 16, -220, -64, -248, -60, 24, -192, 368, 1040, |
79 | | 92, -24, -1044, -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732, 392, 356, |
80 | | 212, -80, -424, -1008, -324, 588, -1496, 576, 460, -816, -848, 56, -580, -92, -1372, -112, |
81 | | -496, 200, 364, 52, -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104, -284, |
82 | | -404, 732, -520, 164, -304, -540, 120, 328, -76, -460, 756, 388, 588, 236, -436, -72, |
83 | | -176, -404, -316, -148, 716, -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960, |
84 | | 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476, 844, -748, -364, -44, 1116, |
85 | | -1104, -1056, 76, 428, 552, -692, 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, |
86 | | 352, -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144, -8, 484, 48, 284, |
87 | | -260, -240, 256, -100, -292, -204, -44, 472, -204, 908, -188, -1000, -256, 92, 1164, -392, |
88 | | 564, 356, 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452, -436, 860, -736, |
89 | | 212, 124, 504, -476, 468, 76, -472, 552, -692, -944, -620, 740, -240, 400, 132, 20, |
90 | | 192, -196, 264, -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448, -832, 148, |
91 | | 248, 652, 616, 1236, 288, -328, -400, -124, 588, 220, 520, -696, 1032, 768, -740, -92, |
92 | | -272, 296, 448, -464, 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216, 320, |
93 | | -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132, 372, -52, -256, 84, 116, -352, |
94 | | 48, 116, 304, -384, 412, 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48, |
95 | | 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196, 436, 896, 88, -392, 132, |
96 | | 80, -964, -288, 568, 56, -48, -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, |
97 | | -292, 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32, -44, 1284, 496, 192, |
98 | | 464, 312, -76, -516, -380, -456, -1012, -48, 308, -156, 36, 492, -156, -808, 188, 1652, |
99 | | 68, -120, -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56, 528, -204, -568, |
100 | | 372, -232, 752, -344, 744, -4, 324, -416, -600, 768, 268, -248, -88, -132, -420, -432, |
101 | | 80, -288, 404, -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92, 1688, -300, |
102 | | 180, 1020, -176, 820, -68, -228, -260, 436, -904, 20, 40, -508, 440, -736, 312, 332, |
103 | | 204, 760, -372, 728, 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584, 192, |
104 | | 396, -728, -520, 276, -188, 80, -52, -612, -252, -48, 648, 212, -688, 228, -52, -260, |
105 | | 428, -412, -272, -404, 180, 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528, |
106 | | 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364, -376, -392, 556, -256, -576, |
107 | | 260, -352, 120, -16, -136, -260, -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, |
108 | | -324, -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64, 384, 68, -128, 136, |
109 | | 240, 248, -204, -68, 252, -932, -120, -480, -628, -84, 192, 852, -404, -288, -132, 204, |
110 | | 100, 168, -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888, 64, 184, 352, |
111 | | 600, 460, 164, 604, -196, 320, -64, 588, -184, 228, 12, 372, 48, -848, -344, 224, |
112 | | 208, -200, 484, 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580, 112, -120, |
113 | | 644, -356, -208, -608, -528, 704, 560, -424, 392, 828, 40, 84, 200, -152, 0, -144, |
114 | | 584, 280, -120, 80, -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688, 0, |
115 | | 160, 356, 372, -776, 740, -128, 676, -248, -480, 4, -364, 96, 544, 232, -1032, 956, |
116 | | 236, 356, 20, -40, 300, 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444, |
117 | | 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192, 716, 120, 920, 688, 168, |
118 | | 44, -460, 568, 284, 1144, 1160, 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, |
119 | | -188, -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404, -696, -72, -268, -892, |
120 | | 128, 184, -344, -780, 360, 336, 400, 344, 428, 548, -112, 136, -228, -216, -820, -516, |
121 | | 340, 92, -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824, 164, -548, -180, |
122 | | -128, 116, -924, -828, 268, -368, -580, 620, 192, 160, 0, -1676, 1068, 424, -56, -360, |
123 | | 468, -156, 720, 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620, -684, -24, |
124 | | -376, -384, -108, -920, -1032, 768, 180, -264, -508, -1268, -260, -60, 300, -240, 988, 724, |
125 | | -376, -576, -212, -736, 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836, 268, |
126 | | 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180, 884, -468, -436, 292, -388, -804, |
127 | | -704, -840, 368, -348, 140, -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32, |
128 | | -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916, 244, 12, -736, -296, 360, |
129 | | 468, -376, -108, -92, 788, 368, -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, |
130 | | -380, -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572, -624, -116, -692, -200, |
131 | | -56, 276, -88, 484, -324, 948, 864, 1000, -456, -184, -276, 292, -296, 156, 676, 320, |
132 | | 160, 908, -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84, 344, -520, 348, |
133 | | -688, 240, -84, 216, -1044, -136, -676, -396, -1500, 960, -40, 176, 168, 1516, 420, -504, |
134 | | -344, -364, -360, 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928, -120, 1112, |
135 | | 476, -260, 560, -148, -344, 108, -196, 228, -288, 504, 560, -328, -88, 288, -1008, 460, |
136 | | -228, 468, -836, -196, 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504, 116, |
137 | | 432, 528, 48, 476, -168, -608, 448, 160, -532, -272, 28, -676, -12, 828, 980, 456, |
138 | | 520, 104, -104, 256, -344, -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208, |
139 | | -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156, -212, 488, -192, -804, -256, |
140 | | 368, -360, -916, -328, 228, -240, -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, |
141 | | 432, 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244, 312, -716, 592, -80, |
142 | | 436, 360, 4, -248, 160, 516, 584, 732, 44, -468, -280, -292, -156, -588, 28, 308, |
143 | | 912, 24, 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300, -212, -1144, 32, |
144 | | -724, 800, -1128, -212, -1288, -848, 180, -416, 440, 192, -576, -792, -76, -1080, 80, -532, |
145 | | -352, -132, 380, -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384, 648, -832, |
146 | | 508, 552, -52, -100, -656, 208, -568, 748, -88, 680, 232, 300, 192, -408, -1012, -152, |
147 | | -252, -268, 272, -876, -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320, -672, |
148 | | -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88, -496, -556, -672, -368, 428, 92, |
149 | | 356, 404, -408, 252, 196, -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120, |
150 | | 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664, -232, 420, 4, -344, -464, |
151 | | 556, 244, -416, -32, 252, 0, -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, |
152 | | 264, -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288, -276, -196, -500, 852, |
153 | | -544, -236, -1128, -992, -776, 116, 56, 52, 860, 884, 212, -12, 168, 1020, 512, -552, |
154 | | 924, -148, 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156, -300, -528, -472, |
155 | | 364, 100, -744, -1056, -32, 540, 280, 144, -676, -32, -232, -280, -224, 96, 568, -76, |
156 | | 172, 148, 148, 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944, 428, -484}; |
157 | | |
158 | | static const int32_t gauss_bits = 11; |
159 | | |
160 | | static int32_t luma_subblock_size_y = 32; |
161 | | static int32_t luma_subblock_size_x = 32; |
162 | | |
163 | | static int32_t chroma_subblock_size_y = 16; |
164 | | static int32_t chroma_subblock_size_x = 16; |
165 | | |
166 | | static const int32_t min_luma_legal_range = 16; |
167 | | static const int32_t max_luma_legal_range = 235; |
168 | | |
169 | | static const int32_t min_chroma_legal_range = 16; |
170 | | static const int32_t max_chroma_legal_range = 240; |
171 | | |
172 | | static int32_t scaling_lut_y[256]; |
173 | | static int32_t scaling_lut_cb[256]; |
174 | | static int32_t scaling_lut_cr[256]; |
175 | | |
176 | | static int32_t grain_center; |
177 | | static int32_t grain_min; |
178 | | static int32_t grain_max; |
179 | | |
180 | | static uint16_t random_register = 0; // random number generator register |
181 | | |
182 | | static void init_arrays(const AomFilmGrain* params, int32_t luma_stride, int32_t chroma_stride, |
183 | | int32_t*** pred_pos_luma_p, int32_t*** pred_pos_chroma_p, int32_t** luma_grain_block, |
184 | | int32_t** cb_grain_block, int32_t** cr_grain_block, int32_t** y_line_buf, int32_t** cb_line_buf, |
185 | | int32_t** cr_line_buf, int32_t** y_col_buf, int32_t** cb_col_buf, int32_t** cr_col_buf, |
186 | | int32_t luma_grain_samples, int32_t chroma_grain_samples, int32_t chroma_subsamp_y, |
187 | 0 | int32_t chroma_subsamp_x) { |
188 | 0 | memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256); |
189 | 0 | memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256); |
190 | 0 | memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256); |
191 | |
|
192 | 0 | int32_t num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1); |
193 | 0 | int32_t num_pos_chroma = num_pos_luma; |
194 | 0 | if (params->num_y_points > 0) { |
195 | 0 | ++num_pos_chroma; |
196 | 0 | } |
197 | |
|
198 | 0 | int32_t** pred_pos_luma; |
199 | 0 | int32_t** pred_pos_chroma; |
200 | |
|
201 | 0 | pred_pos_luma = (int32_t**)malloc(sizeof(*pred_pos_luma) * num_pos_luma); |
202 | 0 | ASSERT(pred_pos_luma != NULL); |
203 | 0 | for (int32_t row = 0; row < num_pos_luma; row++) { |
204 | 0 | pred_pos_luma[row] = (int32_t*)malloc(sizeof(**pred_pos_luma) * 3); |
205 | 0 | ASSERT(pred_pos_luma[row]); |
206 | 0 | } |
207 | |
|
208 | 0 | pred_pos_chroma = (int32_t**)malloc(sizeof(*pred_pos_chroma) * num_pos_chroma); |
209 | 0 | ASSERT(pred_pos_chroma != NULL); |
210 | 0 | for (int32_t row = 0; row < num_pos_chroma; row++) { |
211 | 0 | pred_pos_chroma[row] = (int32_t*)malloc(sizeof(**pred_pos_chroma) * 3); |
212 | 0 | ASSERT(pred_pos_chroma[row]); |
213 | 0 | } |
214 | |
|
215 | 0 | int32_t pos_ar_index = 0; |
216 | |
|
217 | 0 | for (int32_t row = -params->ar_coeff_lag; row < 0; row++) { |
218 | 0 | for (int32_t col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1; col++) { |
219 | 0 | pred_pos_luma[pos_ar_index][0] = row; |
220 | 0 | pred_pos_luma[pos_ar_index][1] = col; |
221 | 0 | pred_pos_luma[pos_ar_index][2] = 0; |
222 | |
|
223 | 0 | pred_pos_chroma[pos_ar_index][0] = row; |
224 | 0 | pred_pos_chroma[pos_ar_index][1] = col; |
225 | 0 | pred_pos_chroma[pos_ar_index][2] = 0; |
226 | 0 | ++pos_ar_index; |
227 | 0 | } |
228 | 0 | } |
229 | |
|
230 | 0 | for (int32_t col = -params->ar_coeff_lag; col < 0; col++) { |
231 | 0 | pred_pos_luma[pos_ar_index][0] = 0; |
232 | 0 | pred_pos_luma[pos_ar_index][1] = col; |
233 | 0 | pred_pos_luma[pos_ar_index][2] = 0; |
234 | |
|
235 | 0 | pred_pos_chroma[pos_ar_index][0] = 0; |
236 | 0 | pred_pos_chroma[pos_ar_index][1] = col; |
237 | 0 | pred_pos_chroma[pos_ar_index][2] = 0; |
238 | |
|
239 | 0 | ++pos_ar_index; |
240 | 0 | } |
241 | |
|
242 | 0 | if (params->num_y_points > 0) { |
243 | 0 | pred_pos_chroma[pos_ar_index][0] = 0; |
244 | 0 | pred_pos_chroma[pos_ar_index][1] = 0; |
245 | 0 | pred_pos_chroma[pos_ar_index][2] = 1; |
246 | 0 | } |
247 | |
|
248 | 0 | *pred_pos_luma_p = pred_pos_luma; |
249 | 0 | *pred_pos_chroma_p = pred_pos_chroma; |
250 | |
|
251 | 0 | *y_line_buf = (int32_t*)malloc(sizeof(**y_line_buf) * luma_stride * 2); |
252 | 0 | *cb_line_buf = (int32_t*)malloc(sizeof(**cb_line_buf) * chroma_stride * (2 >> chroma_subsamp_y)); |
253 | 0 | *cr_line_buf = (int32_t*)malloc(sizeof(**cr_line_buf) * chroma_stride * (2 >> chroma_subsamp_y)); |
254 | |
|
255 | 0 | *y_col_buf = (int32_t*)malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2); |
256 | 0 | *cb_col_buf = (int32_t*)malloc(sizeof(**cb_col_buf) * (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) * |
257 | 0 | (2 >> chroma_subsamp_x)); |
258 | 0 | *cr_col_buf = (int32_t*)malloc(sizeof(**cr_col_buf) * (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) * |
259 | 0 | (2 >> chroma_subsamp_x)); |
260 | |
|
261 | 0 | *luma_grain_block = (int32_t*)malloc(sizeof(**luma_grain_block) * luma_grain_samples); |
262 | 0 | *cb_grain_block = (int32_t*)malloc(sizeof(**cb_grain_block) * chroma_grain_samples); |
263 | 0 | *cr_grain_block = (int32_t*)malloc(sizeof(**cr_grain_block) * chroma_grain_samples); |
264 | 0 | } |
265 | | |
266 | | static void dealloc_arrays(const AomFilmGrain* params, int32_t*** pred_pos_luma, int32_t*** pred_pos_chroma, |
267 | | int32_t** luma_grain_block, int32_t** cb_grain_block, int32_t** cr_grain_block, |
268 | | int32_t** y_line_buf, int32_t** cb_line_buf, int32_t** cr_line_buf, int32_t** y_col_buf, |
269 | 0 | int32_t** cb_col_buf, int32_t** cr_col_buf) { |
270 | 0 | int32_t num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1); |
271 | 0 | int32_t num_pos_chroma = num_pos_luma; |
272 | 0 | if (params->num_y_points > 0) { |
273 | 0 | ++num_pos_chroma; |
274 | 0 | } |
275 | |
|
276 | 0 | for (int32_t row = 0; row < num_pos_luma; row++) { |
277 | 0 | free((*pred_pos_luma)[row]); |
278 | 0 | } |
279 | 0 | free(*pred_pos_luma); |
280 | |
|
281 | 0 | for (int32_t row = 0; row < num_pos_chroma; row++) { |
282 | 0 | free((*pred_pos_chroma)[row]); |
283 | 0 | } |
284 | 0 | free((*pred_pos_chroma)); |
285 | |
|
286 | 0 | free(*y_line_buf); |
287 | |
|
288 | 0 | free(*cb_line_buf); |
289 | |
|
290 | 0 | free(*cr_line_buf); |
291 | |
|
292 | 0 | free(*y_col_buf); |
293 | |
|
294 | 0 | free(*cb_col_buf); |
295 | |
|
296 | 0 | free(*cr_col_buf); |
297 | |
|
298 | 0 | free(*luma_grain_block); |
299 | |
|
300 | 0 | free(*cb_grain_block); |
301 | |
|
302 | 0 | free(*cr_grain_block); |
303 | 0 | } |
304 | | |
305 | | // get a number between 0 and 2^bits - 1 |
306 | 0 | static INLINE int32_t get_random_number(int32_t bits) { |
307 | 0 | uint16_t bit; |
308 | 0 | bit = ((random_register >> 0) ^ (random_register >> 1) ^ (random_register >> 3) ^ (random_register >> 12)) & 1; |
309 | 0 | random_register = (random_register >> 1) | (bit << 15); |
310 | 0 | return (random_register >> (16 - bits)) & ((1 << bits) - 1); |
311 | 0 | } |
312 | | |
313 | 0 | static void init_random_generator(int32_t luma_line, uint16_t seed) { |
314 | | // same for the picture |
315 | |
|
316 | 0 | uint16_t msb = (seed >> 8) & 255; |
317 | 0 | uint16_t lsb = seed & 255; |
318 | |
|
319 | 0 | random_register = (msb << 8) + lsb; |
320 | | |
321 | | // changes for each row |
322 | 0 | int32_t luma_num = luma_line >> 5; |
323 | |
|
324 | 0 | random_register ^= ((luma_num * 37 + 178) & 255) << 8; |
325 | 0 | random_register ^= ((luma_num * 173 + 105) & 255); |
326 | 0 | } |
327 | | |
328 | | static void generate_luma_grain_block(const AomFilmGrain* params, int32_t** pred_pos_luma, int32_t* luma_grain_block, |
329 | | int32_t luma_block_size_y, int32_t luma_block_size_x, int32_t luma_grain_stride, |
330 | 0 | int32_t left_pad, int32_t top_pad, int32_t right_pad, int32_t bottom_pad) { |
331 | 0 | if (params->num_y_points == 0) { |
332 | 0 | return; |
333 | 0 | } |
334 | | |
335 | 0 | int32_t bit_depth = params->bit_depth; |
336 | 0 | int32_t gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift; |
337 | |
|
338 | 0 | int32_t num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1); |
339 | 0 | int32_t rounding_offset = (1 << (params->ar_coeff_shift - 1)); |
340 | |
|
341 | 0 | for (int32_t i = 0; i < luma_block_size_y; i++) { |
342 | 0 | for (int32_t j = 0; j < luma_block_size_x; j++) { |
343 | 0 | luma_grain_block[i * luma_grain_stride + j] = (gaussian_sequence[get_random_number(gauss_bits)] + |
344 | 0 | ((1 << gauss_sec_shift) >> 1)) >> |
345 | 0 | gauss_sec_shift; |
346 | 0 | } |
347 | 0 | } |
348 | |
|
349 | 0 | for (int32_t i = top_pad; i < luma_block_size_y - bottom_pad; i++) { |
350 | 0 | for (int32_t j = left_pad; j < luma_block_size_x - right_pad; j++) { |
351 | 0 | int32_t wsum = 0; |
352 | 0 | for (int32_t pos = 0; pos < num_pos_luma; pos++) { |
353 | 0 | wsum = wsum + |
354 | 0 | params->ar_coeffs_y[pos] * |
355 | 0 | luma_grain_block[(i + pred_pos_luma[pos][0]) * luma_grain_stride + j + pred_pos_luma[pos][1]]; |
356 | 0 | } |
357 | 0 | luma_grain_block[i * luma_grain_stride + j] = clamp( |
358 | 0 | luma_grain_block[i * luma_grain_stride + j] + ((wsum + rounding_offset) >> params->ar_coeff_shift), |
359 | 0 | grain_min, |
360 | 0 | grain_max); |
361 | 0 | } |
362 | 0 | } |
363 | 0 | } |
364 | | |
365 | | static void generate_chroma_grain_blocks(const AomFilmGrain* params, |
366 | | // int32_t** pred_pos_luma, |
367 | | int32_t** pred_pos_chroma, int32_t* luma_grain_block, int32_t* cb_grain_block, |
368 | | int32_t* cr_grain_block, int32_t luma_grain_stride, |
369 | | int32_t chroma_block_size_y, int32_t chroma_block_size_x, |
370 | | int32_t chroma_grain_stride, int32_t left_pad, int32_t top_pad, |
371 | | int32_t right_pad, int32_t bottom_pad, int32_t chroma_subsamp_y, |
372 | 0 | int32_t chroma_subsamp_x) { |
373 | 0 | int32_t bit_depth = params->bit_depth; |
374 | 0 | int32_t gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift; |
375 | |
|
376 | 0 | int32_t num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1); |
377 | 0 | if (params->num_y_points > 0) { |
378 | 0 | ++num_pos_chroma; |
379 | 0 | } |
380 | 0 | int32_t rounding_offset = (1 << (params->ar_coeff_shift - 1)); |
381 | |
|
382 | 0 | int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride; |
383 | |
|
384 | 0 | if (params->num_cb_points || params->chroma_scaling_from_luma) { |
385 | 0 | init_random_generator(7 << 5, params->random_seed); |
386 | |
|
387 | 0 | for (int32_t i = 0; i < chroma_block_size_y; i++) { |
388 | 0 | for (int32_t j = 0; j < chroma_block_size_x; j++) { |
389 | 0 | cb_grain_block[i * chroma_grain_stride + j] = (gaussian_sequence[get_random_number(gauss_bits)] + |
390 | 0 | ((1 << gauss_sec_shift) >> 1)) >> |
391 | 0 | gauss_sec_shift; |
392 | 0 | } |
393 | 0 | } |
394 | 0 | } else { |
395 | 0 | memset(cb_grain_block, 0, sizeof(*cb_grain_block) * chroma_grain_block_size); |
396 | 0 | } |
397 | 0 | if (params->num_cr_points || params->chroma_scaling_from_luma) { |
398 | 0 | init_random_generator(11 << 5, params->random_seed); |
399 | |
|
400 | 0 | for (int32_t i = 0; i < chroma_block_size_y; i++) { |
401 | 0 | for (int32_t j = 0; j < chroma_block_size_x; j++) { |
402 | 0 | cr_grain_block[i * chroma_grain_stride + j] = (gaussian_sequence[get_random_number(gauss_bits)] + |
403 | 0 | ((1 << gauss_sec_shift) >> 1)) >> |
404 | 0 | gauss_sec_shift; |
405 | 0 | } |
406 | 0 | } |
407 | 0 | } else { |
408 | 0 | memset(cr_grain_block, 0, sizeof(*cr_grain_block) * chroma_grain_block_size); |
409 | 0 | } |
410 | |
|
411 | 0 | for (int32_t i = top_pad; i < chroma_block_size_y - bottom_pad; i++) { |
412 | 0 | for (int32_t j = left_pad; j < chroma_block_size_x - right_pad; j++) { |
413 | 0 | int32_t wsum_cb = 0; |
414 | 0 | int32_t wsum_cr = 0; |
415 | 0 | for (int32_t pos = 0; pos < num_pos_chroma; pos++) { |
416 | 0 | if (pred_pos_chroma[pos][2] == 0) { |
417 | 0 | wsum_cb = wsum_cb + |
418 | 0 | params->ar_coeffs_cb[pos] * |
419 | 0 | cb_grain_block[(i + pred_pos_chroma[pos][0]) * chroma_grain_stride + j + |
420 | 0 | pred_pos_chroma[pos][1]]; |
421 | 0 | wsum_cr = wsum_cr + |
422 | 0 | params->ar_coeffs_cr[pos] * |
423 | 0 | cr_grain_block[(i + pred_pos_chroma[pos][0]) * chroma_grain_stride + j + |
424 | 0 | pred_pos_chroma[pos][1]]; |
425 | 0 | } else if (pred_pos_chroma[pos][2] == 1) { |
426 | 0 | int32_t av_luma = 0; |
427 | 0 | int32_t luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad; |
428 | 0 | int32_t luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad; |
429 | |
|
430 | 0 | for (int32_t k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1; k++) { |
431 | 0 | for (int32_t l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1; l++) { |
432 | 0 | av_luma += luma_grain_block[k * luma_grain_stride + l]; |
433 | 0 | } |
434 | 0 | } |
435 | |
|
436 | 0 | av_luma = (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >> |
437 | 0 | (chroma_subsamp_y + chroma_subsamp_x); |
438 | |
|
439 | 0 | wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma; |
440 | 0 | wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma; |
441 | 0 | } else { |
442 | 0 | SVT_ERROR( |
443 | 0 | "Grain synthesis: prediction between two chroma components is " |
444 | 0 | "not supported!"); |
445 | 0 | exit(1); |
446 | 0 | } |
447 | 0 | } |
448 | 0 | if (params->num_cb_points || params->chroma_scaling_from_luma) { |
449 | 0 | cb_grain_block[i * chroma_grain_stride + j] = clamp( |
450 | 0 | cb_grain_block[i * chroma_grain_stride + j] + |
451 | 0 | ((wsum_cb + rounding_offset) >> params->ar_coeff_shift), |
452 | 0 | grain_min, |
453 | 0 | grain_max); |
454 | 0 | } |
455 | 0 | if (params->num_cr_points || params->chroma_scaling_from_luma) { |
456 | 0 | cr_grain_block[i * chroma_grain_stride + j] = clamp( |
457 | 0 | cr_grain_block[i * chroma_grain_stride + j] + |
458 | 0 | ((wsum_cr + rounding_offset) >> params->ar_coeff_shift), |
459 | 0 | grain_min, |
460 | 0 | grain_max); |
461 | 0 | } |
462 | 0 | } |
463 | 0 | } |
464 | 0 | } |
465 | | |
466 | 0 | static void init_scaling_function(const int32_t scaling_points[][2], int32_t num_points, int32_t scaling_lut[]) { |
467 | 0 | if (num_points == 0) { |
468 | 0 | return; |
469 | 0 | } |
470 | | |
471 | 0 | for (int32_t i = 0; i < scaling_points[0][0]; i++) { |
472 | 0 | scaling_lut[i] = scaling_points[0][1]; |
473 | 0 | } |
474 | |
|
475 | 0 | for (int32_t point = 0; point < num_points - 1; point++) { |
476 | 0 | int64_t delta_y = scaling_points[point + 1][1] - scaling_points[point][1]; |
477 | 0 | int64_t delta_x = scaling_points[point + 1][0] - scaling_points[point][0]; |
478 | |
|
479 | 0 | int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x); |
480 | |
|
481 | 0 | for (int32_t x = 0; x < delta_x; x++) { |
482 | 0 | scaling_lut[scaling_points[point][0] + x] = scaling_points[point][1] + (int32_t)((x * delta + 32768) >> 16); |
483 | 0 | } |
484 | 0 | } |
485 | |
|
486 | 0 | for (int32_t i = scaling_points[num_points - 1][0]; i < 256; i++) { |
487 | 0 | scaling_lut[i] = scaling_points[num_points - 1][1]; |
488 | 0 | } |
489 | 0 | } |
490 | | |
491 | | // function that extracts samples from a lut (and interpolates intemediate |
492 | | // frames for 10- and 12-bit video) |
493 | 0 | static int32_t scale_lut(int32_t* scaling_lut, int32_t index, int32_t bit_depth) { |
494 | 0 | int32_t x = index >> (bit_depth - 8); |
495 | |
|
496 | 0 | if (!(bit_depth - 8) || x == 255) { |
497 | 0 | return scaling_lut[x]; |
498 | 0 | } else { |
499 | 0 | return scaling_lut[x] + |
500 | 0 | (((scaling_lut[x + 1] - scaling_lut[x]) * (index & ((1 << (bit_depth - 8)) - 1)) + |
501 | 0 | (1 << (bit_depth - 9))) >> |
502 | 0 | (bit_depth - 8)); |
503 | 0 | } |
504 | 0 | } |
505 | | |
506 | | static void add_noise_to_block(const AomFilmGrain* params, uint8_t* luma, uint8_t* cb, uint8_t* cr, int32_t luma_stride, |
507 | | int32_t chroma_stride, int32_t* luma_grain, int32_t* cb_grain, int32_t* cr_grain, |
508 | | int32_t luma_grain_stride, int32_t chroma_grain_stride, int32_t half_luma_height, |
509 | | int32_t half_luma_width, int32_t bit_depth, int32_t chroma_subsamp_y, |
510 | 0 | int32_t chroma_subsamp_x) { |
511 | 0 | int32_t cb_mult = params->cb_mult - 128; // fixed scale |
512 | 0 | int32_t cb_luma_mult = params->cb_luma_mult - 128; // fixed scale |
513 | 0 | int32_t cb_offset = params->cb_offset - 256; |
514 | |
|
515 | 0 | int32_t cr_mult = params->cr_mult - 128; // fixed scale |
516 | 0 | int32_t cr_luma_mult = params->cr_luma_mult - 128; // fixed scale |
517 | 0 | int32_t cr_offset = params->cr_offset - 256; |
518 | |
|
519 | 0 | int32_t rounding_offset = (1 << (params->scaling_shift - 1)); |
520 | |
|
521 | 0 | int32_t apply_y = params->num_y_points > 0 ? 1 : 0; |
522 | 0 | int32_t apply_cb = (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0; |
523 | 0 | int32_t apply_cr = (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0; |
524 | |
|
525 | 0 | if (params->chroma_scaling_from_luma) { |
526 | 0 | cb_mult = 0; // fixed scale |
527 | 0 | cb_luma_mult = 64; // fixed scale |
528 | 0 | cb_offset = 0; |
529 | |
|
530 | 0 | cr_mult = 0; // fixed scale |
531 | 0 | cr_luma_mult = 64; // fixed scale |
532 | 0 | cr_offset = 0; |
533 | 0 | } |
534 | |
|
535 | 0 | int32_t min_luma, max_luma, min_chroma, max_chroma; |
536 | |
|
537 | 0 | if (params->clip_to_restricted_range) { |
538 | 0 | min_luma = min_luma_legal_range; |
539 | 0 | max_luma = max_luma_legal_range; |
540 | |
|
541 | 0 | min_chroma = min_chroma_legal_range; |
542 | 0 | max_chroma = max_chroma_legal_range; |
543 | 0 | } else { |
544 | 0 | min_luma = min_chroma = 0; |
545 | 0 | max_luma = max_chroma = 255; |
546 | 0 | } |
547 | |
|
548 | 0 | for (int32_t i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) { |
549 | 0 | for (int32_t j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) { |
550 | 0 | int32_t average_luma = 0; |
551 | 0 | if (chroma_subsamp_x) { |
552 | 0 | average_luma = (luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x)] + |
553 | 0 | luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x) + 1] + 1) >> |
554 | 0 | 1; |
555 | 0 | } else { |
556 | 0 | average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j]; |
557 | 0 | } |
558 | 0 | if (apply_cb) { |
559 | 0 | cb[i * chroma_stride + j] = clamp( |
560 | 0 | cb[i * chroma_stride + j] + |
561 | 0 | ((scale_lut(scaling_lut_cb, |
562 | 0 | clamp(((average_luma * cb_luma_mult + cb_mult * cb[i * chroma_stride + j]) >> 6) + |
563 | 0 | cb_offset, |
564 | 0 | 0, |
565 | 0 | (256 << (bit_depth - 8)) - 1), |
566 | 0 | 8) * |
567 | 0 | cb_grain[i * chroma_grain_stride + j] + |
568 | 0 | rounding_offset) >> |
569 | 0 | params->scaling_shift), |
570 | 0 | min_chroma, |
571 | 0 | max_chroma); |
572 | 0 | } |
573 | |
|
574 | 0 | if (apply_cr) { |
575 | 0 | cr[i * chroma_stride + j] = clamp( |
576 | 0 | cr[i * chroma_stride + j] + |
577 | 0 | ((scale_lut(scaling_lut_cr, |
578 | 0 | clamp(((average_luma * cr_luma_mult + cr_mult * cr[i * chroma_stride + j]) >> 6) + |
579 | 0 | cr_offset, |
580 | 0 | 0, |
581 | 0 | (256 << (bit_depth - 8)) - 1), |
582 | 0 | 8) * |
583 | 0 | cr_grain[i * chroma_grain_stride + j] + |
584 | 0 | rounding_offset) >> |
585 | 0 | params->scaling_shift), |
586 | 0 | min_chroma, |
587 | 0 | max_chroma); |
588 | 0 | } |
589 | 0 | } |
590 | 0 | } |
591 | |
|
592 | 0 | if (apply_y) { |
593 | 0 | for (int32_t i = 0; i < (half_luma_height << 1); i++) { |
594 | 0 | for (int32_t j = 0; j < (half_luma_width << 1); j++) { |
595 | 0 | luma[i * luma_stride + j] = clamp(luma[i * luma_stride + j] + |
596 | 0 | ((scale_lut(scaling_lut_y, luma[i * luma_stride + j], 8) * |
597 | 0 | luma_grain[i * luma_grain_stride + j] + |
598 | 0 | rounding_offset) >> |
599 | 0 | params->scaling_shift), |
600 | 0 | min_luma, |
601 | 0 | max_luma); |
602 | 0 | } |
603 | 0 | } |
604 | 0 | } |
605 | 0 | } |
606 | | |
607 | | static void add_noise_to_block_hbd(const AomFilmGrain* params, uint16_t* luma, uint16_t* cb, uint16_t* cr, |
608 | | int32_t luma_stride, int32_t chroma_stride, int32_t* luma_grain, int32_t* cb_grain, |
609 | | int32_t* cr_grain, int32_t luma_grain_stride, int32_t chroma_grain_stride, |
610 | | int32_t half_luma_height, int32_t half_luma_width, int32_t bit_depth, |
611 | 0 | int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) { |
612 | 0 | int32_t cb_mult = params->cb_mult - 128; // fixed scale |
613 | 0 | int32_t cb_luma_mult = params->cb_luma_mult - 128; // fixed scale |
614 | | // offset value depends on the bit depth |
615 | 0 | int32_t cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth); |
616 | |
|
617 | 0 | int32_t cr_mult = params->cr_mult - 128; // fixed scale |
618 | 0 | int32_t cr_luma_mult = params->cr_luma_mult - 128; // fixed scale |
619 | | // offset value depends on the bit depth |
620 | 0 | int32_t cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth); |
621 | |
|
622 | 0 | int32_t rounding_offset = (1 << (params->scaling_shift - 1)); |
623 | |
|
624 | 0 | int32_t apply_y = params->num_y_points > 0 ? 1 : 0; |
625 | 0 | int32_t apply_cb = params->num_cb_points > 0 ? 1 : 0; |
626 | 0 | int32_t apply_cr = params->num_cr_points > 0 ? 1 : 0; |
627 | |
|
628 | 0 | if (params->chroma_scaling_from_luma) { |
629 | 0 | cb_mult = 0; // fixed scale |
630 | 0 | cb_luma_mult = 64; // fixed scale |
631 | 0 | cb_offset = 0; |
632 | |
|
633 | 0 | cr_mult = 0; // fixed scale |
634 | 0 | cr_luma_mult = 64; // fixed scale |
635 | 0 | cr_offset = 0; |
636 | 0 | } |
637 | |
|
638 | 0 | int32_t min_luma, max_luma, min_chroma, max_chroma; |
639 | |
|
640 | 0 | if (params->clip_to_restricted_range) { |
641 | 0 | min_luma = min_luma_legal_range << (bit_depth - 8); |
642 | 0 | max_luma = max_luma_legal_range << (bit_depth - 8); |
643 | |
|
644 | 0 | min_chroma = min_chroma_legal_range << (bit_depth - 8); |
645 | 0 | max_chroma = max_chroma_legal_range << (bit_depth - 8); |
646 | 0 | } else { |
647 | 0 | min_luma = min_chroma = 0; |
648 | 0 | max_luma = max_chroma = (256 << (bit_depth - 8)) - 1; |
649 | 0 | } |
650 | |
|
651 | 0 | for (int32_t i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) { |
652 | 0 | for (int32_t j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) { |
653 | 0 | int32_t average_luma = 0; |
654 | 0 | if (chroma_subsamp_x) { |
655 | 0 | average_luma = (luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x)] + |
656 | 0 | luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x) + 1] + 1) >> |
657 | 0 | 1; |
658 | 0 | } else { |
659 | 0 | average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j]; |
660 | 0 | } |
661 | 0 | if (apply_cb) { |
662 | 0 | cb[i * chroma_stride + j] = clamp( |
663 | 0 | cb[i * chroma_stride + j] + |
664 | 0 | ((scale_lut(scaling_lut_cb, |
665 | 0 | clamp(((average_luma * cb_luma_mult + cb_mult * cb[i * chroma_stride + j]) >> 6) + |
666 | 0 | cb_offset, |
667 | 0 | 0, |
668 | 0 | (256 << (bit_depth - 8)) - 1), |
669 | 0 | bit_depth) * |
670 | 0 | cb_grain[i * chroma_grain_stride + j] + |
671 | 0 | rounding_offset) >> |
672 | 0 | params->scaling_shift), |
673 | 0 | min_chroma, |
674 | 0 | max_chroma); |
675 | 0 | } |
676 | 0 | if (apply_cr) { |
677 | 0 | cr[i * chroma_stride + j] = clamp( |
678 | 0 | cr[i * chroma_stride + j] + |
679 | 0 | ((scale_lut(scaling_lut_cr, |
680 | 0 | clamp(((average_luma * cr_luma_mult + cr_mult * cr[i * chroma_stride + j]) >> 6) + |
681 | 0 | cr_offset, |
682 | 0 | 0, |
683 | 0 | (256 << (bit_depth - 8)) - 1), |
684 | 0 | bit_depth) * |
685 | 0 | cr_grain[i * chroma_grain_stride + j] + |
686 | 0 | rounding_offset) >> |
687 | 0 | params->scaling_shift), |
688 | 0 | min_chroma, |
689 | 0 | max_chroma); |
690 | 0 | } |
691 | 0 | } |
692 | 0 | } |
693 | |
|
694 | 0 | if (apply_y) { |
695 | 0 | for (int32_t i = 0; i < (half_luma_height << 1); i++) { |
696 | 0 | for (int32_t j = 0; j < (half_luma_width << 1); j++) { |
697 | 0 | luma[i * luma_stride + j] = clamp(luma[i * luma_stride + j] + |
698 | 0 | ((scale_lut(scaling_lut_y, luma[i * luma_stride + j], bit_depth) * |
699 | 0 | luma_grain[i * luma_grain_stride + j] + |
700 | 0 | rounding_offset) >> |
701 | 0 | params->scaling_shift), |
702 | 0 | min_luma, |
703 | 0 | max_luma); |
704 | 0 | } |
705 | 0 | } |
706 | 0 | } |
707 | 0 | } |
708 | | |
709 | 0 | int32_t svt_aom_film_grain_params_equal(const AomFilmGrain* pars_a, const AomFilmGrain* pars_b) { |
710 | 0 | if (pars_a->apply_grain != pars_b->apply_grain) { |
711 | 0 | return 0; |
712 | 0 | } |
713 | 0 | if (pars_a->overlap_flag != pars_b->overlap_flag) { |
714 | 0 | return 0; |
715 | 0 | } |
716 | 0 | if (pars_a->clip_to_restricted_range != pars_b->clip_to_restricted_range) { |
717 | 0 | return 0; |
718 | 0 | } |
719 | 0 | if (pars_a->chroma_scaling_from_luma != pars_b->chroma_scaling_from_luma) { |
720 | 0 | return 0; |
721 | 0 | } |
722 | 0 | if (pars_a->grain_scale_shift != pars_b->grain_scale_shift) { |
723 | 0 | return 0; |
724 | 0 | } |
725 | 0 | if (pars_a->ar_coeff_shift != pars_b->ar_coeff_shift) { |
726 | 0 | return 0; |
727 | 0 | } |
728 | 0 | if (pars_a->cb_mult != pars_b->cb_mult) { |
729 | 0 | return 0; |
730 | 0 | } |
731 | 0 | if (pars_a->cb_luma_mult != pars_b->cb_luma_mult) { |
732 | 0 | return 0; |
733 | 0 | } |
734 | 0 | if (pars_a->cb_offset != pars_b->cb_offset) { |
735 | 0 | return 0; |
736 | 0 | } |
737 | 0 | if (pars_a->cr_mult != pars_b->cr_mult) { |
738 | 0 | return 0; |
739 | 0 | } |
740 | 0 | if (pars_a->cr_luma_mult != pars_b->cr_luma_mult) { |
741 | 0 | return 0; |
742 | 0 | } |
743 | 0 | if (pars_a->cr_offset != pars_b->cr_offset) { |
744 | 0 | return 0; |
745 | 0 | } |
746 | | |
747 | 0 | if (pars_a->scaling_shift != pars_b->scaling_shift) { |
748 | 0 | return 0; |
749 | 0 | } |
750 | 0 | if (pars_a->ar_coeff_lag != pars_b->ar_coeff_lag) { |
751 | 0 | return 0; |
752 | 0 | } |
753 | | |
754 | 0 | if (pars_a->num_y_points != pars_b->num_y_points) { |
755 | 0 | return 0; |
756 | 0 | } |
757 | | |
758 | 0 | if (pars_a->num_cb_points != pars_b->num_cb_points) { |
759 | 0 | return 0; |
760 | 0 | } |
761 | | |
762 | 0 | if (pars_a->num_cr_points != pars_b->num_cr_points) { |
763 | 0 | return 0; |
764 | 0 | } |
765 | | |
766 | 0 | if (memcmp(pars_a->scaling_points_y, pars_b->scaling_points_y, sizeof(pars_b->scaling_points_y))) { |
767 | 0 | return 0; |
768 | 0 | } |
769 | | |
770 | 0 | if (memcmp(pars_a->scaling_points_cb, pars_b->scaling_points_cb, sizeof(pars_b->scaling_points_cb))) { |
771 | 0 | return 0; |
772 | 0 | } |
773 | | |
774 | 0 | if (memcmp(pars_a->scaling_points_cr, pars_b->scaling_points_cr, sizeof(pars_b->scaling_points_cr))) { |
775 | 0 | return 0; |
776 | 0 | } |
777 | | |
778 | 0 | if (memcmp(pars_a->ar_coeffs_y, pars_b->ar_coeffs_y, sizeof(pars_b->ar_coeffs_y))) { |
779 | 0 | return 0; |
780 | 0 | } |
781 | | |
782 | 0 | if (memcmp(pars_a->ar_coeffs_cb, pars_b->ar_coeffs_cb, sizeof(pars_b->ar_coeffs_cb))) { |
783 | 0 | return 0; |
784 | 0 | } |
785 | | |
786 | 0 | if (memcmp(pars_a->ar_coeffs_cr, pars_b->ar_coeffs_cr, sizeof(pars_b->ar_coeffs_cr))) { |
787 | 0 | return 0; |
788 | 0 | } |
789 | | |
790 | 0 | return 1; |
791 | 0 | } |
792 | | |
793 | | void svt_aom_fgn_copy_rect(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, |
794 | 0 | int32_t height, int32_t use_high_bit_depth) { |
795 | 0 | int32_t hbd_coeff = use_high_bit_depth ? 2 : 1; |
796 | 0 | while (height) { |
797 | 0 | svt_memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff); |
798 | 0 | src += src_stride * hbd_coeff; |
799 | 0 | dst += dst_stride * hbd_coeff; |
800 | 0 | --height; |
801 | 0 | } |
802 | 0 | return; |
803 | 0 | } |
804 | | |
805 | | static void copy_area(int32_t* src, int32_t src_stride, int32_t* dst, int32_t dst_stride, int32_t width, |
806 | 0 | int32_t height) { |
807 | 0 | while (height) { |
808 | 0 | if (svt_memcpy != NULL) { |
809 | 0 | svt_memcpy(dst, src, width * sizeof(*src)); |
810 | 0 | } else { |
811 | 0 | svt_memcpy_c(dst, src, width * sizeof(*src)); |
812 | 0 | } |
813 | 0 | src += src_stride; |
814 | 0 | dst += dst_stride; |
815 | 0 | --height; |
816 | 0 | } |
817 | 0 | return; |
818 | 0 | } |
819 | | |
820 | | static void ver_boundary_overlap(int32_t* left_block, int32_t left_stride, int32_t* right_block, int32_t right_stride, |
821 | 0 | int32_t* dst_block, int32_t dst_stride, int32_t width, int32_t height) { |
822 | 0 | if (width == 1) { |
823 | 0 | while (height) { |
824 | 0 | *dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5, grain_min, grain_max); |
825 | 0 | left_block += left_stride; |
826 | 0 | right_block += right_stride; |
827 | 0 | dst_block += dst_stride; |
828 | 0 | --height; |
829 | 0 | } |
830 | 0 | return; |
831 | 0 | } else if (width == 2) { |
832 | 0 | while (height) { |
833 | 0 | dst_block[0] = clamp((27 * left_block[0] + 17 * right_block[0] + 16) >> 5, grain_min, grain_max); |
834 | 0 | dst_block[1] = clamp((17 * left_block[1] + 27 * right_block[1] + 16) >> 5, grain_min, grain_max); |
835 | 0 | left_block += left_stride; |
836 | 0 | right_block += right_stride; |
837 | 0 | dst_block += dst_stride; |
838 | 0 | --height; |
839 | 0 | } |
840 | 0 | return; |
841 | 0 | } |
842 | 0 | } |
843 | | |
844 | | static void hor_boundary_overlap(int32_t* top_block, int32_t top_stride, int32_t* bottom_block, int32_t bottom_stride, |
845 | 0 | int32_t* dst_block, int32_t dst_stride, int32_t width, int32_t height) { |
846 | 0 | if (height == 1) { |
847 | 0 | while (width) { |
848 | 0 | *dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5, grain_min, grain_max); |
849 | 0 | ++top_block; |
850 | 0 | ++bottom_block; |
851 | 0 | ++dst_block; |
852 | 0 | --width; |
853 | 0 | } |
854 | 0 | return; |
855 | 0 | } else if (height == 2) { |
856 | 0 | while (width) { |
857 | 0 | dst_block[0] = clamp((27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5, grain_min, grain_max); |
858 | 0 | dst_block[dst_stride] = clamp( |
859 | 0 | (17 * top_block[top_stride] + 27 * bottom_block[bottom_stride] + 16) >> 5, grain_min, grain_max); |
860 | 0 | ++top_block; |
861 | 0 | ++bottom_block; |
862 | 0 | ++dst_block; |
863 | 0 | --width; |
864 | 0 | } |
865 | 0 | return; |
866 | 0 | } |
867 | 0 | } |
868 | | |
869 | | void svt_av1_add_film_grain_run(const AomFilmGrain* params, uint8_t* luma, uint8_t* cb, uint8_t* cr, int32_t height, |
870 | | int32_t width, int32_t luma_stride, int32_t chroma_stride, int32_t use_high_bit_depth, |
871 | 0 | int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) { |
872 | 0 | int32_t** pred_pos_luma; |
873 | 0 | int32_t** pred_pos_chroma; |
874 | 0 | int32_t* luma_grain_block; |
875 | 0 | int32_t* cb_grain_block; |
876 | 0 | int32_t* cr_grain_block; |
877 | |
|
878 | 0 | int32_t* y_line_buf; |
879 | 0 | int32_t* cb_line_buf; |
880 | 0 | int32_t* cr_line_buf; |
881 | |
|
882 | 0 | int32_t* y_col_buf; |
883 | 0 | int32_t* cb_col_buf; |
884 | 0 | int32_t* cr_col_buf; |
885 | |
|
886 | 0 | random_register = params->random_seed; |
887 | |
|
888 | 0 | int32_t left_pad = 3; |
889 | 0 | int32_t right_pad = 3; // padding to offset for AR coefficients |
890 | 0 | int32_t top_pad = 3; |
891 | 0 | int32_t bottom_pad = 0; |
892 | |
|
893 | 0 | int32_t ar_padding = 3; // maximum lag used for stabilization of AR coefficients |
894 | |
|
895 | 0 | luma_subblock_size_y = 32; |
896 | 0 | luma_subblock_size_x = 32; |
897 | |
|
898 | 0 | chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y; |
899 | 0 | chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x; |
900 | | |
901 | | // Initial padding is only needed for generation of |
902 | | // film grain templates (to stabilize the AR process) |
903 | | // Only a 64x64 luma and 32x32 chroma part of a template |
904 | | // is used later for adding grain, padding can be discarded |
905 | |
|
906 | 0 | int32_t luma_block_size_y = top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad; |
907 | 0 | int32_t luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 + 2 * ar_padding + right_pad; |
908 | |
|
909 | 0 | int32_t chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding + chroma_subblock_size_y * 2 + |
910 | 0 | bottom_pad; |
911 | 0 | int32_t chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding + chroma_subblock_size_x * 2 + |
912 | 0 | (2 >> chroma_subsamp_x) * ar_padding + right_pad; |
913 | |
|
914 | 0 | int32_t luma_grain_stride = luma_block_size_x; |
915 | 0 | int32_t chroma_grain_stride = chroma_block_size_x; |
916 | |
|
917 | 0 | int32_t overlap = params->overlap_flag; |
918 | 0 | int32_t bit_depth = params->bit_depth; |
919 | |
|
920 | 0 | grain_center = 128 << (bit_depth - 8); |
921 | 0 | grain_min = 0 - grain_center; |
922 | 0 | grain_max = (256 << (bit_depth - 8)) - 1 - grain_center; |
923 | |
|
924 | 0 | init_arrays(params, |
925 | 0 | luma_stride, |
926 | 0 | chroma_stride, |
927 | 0 | &pred_pos_luma, |
928 | 0 | &pred_pos_chroma, |
929 | 0 | &luma_grain_block, |
930 | 0 | &cb_grain_block, |
931 | 0 | &cr_grain_block, |
932 | 0 | &y_line_buf, |
933 | 0 | &cb_line_buf, |
934 | 0 | &cr_line_buf, |
935 | 0 | &y_col_buf, |
936 | 0 | &cb_col_buf, |
937 | 0 | &cr_col_buf, |
938 | 0 | luma_block_size_y * luma_block_size_x, |
939 | 0 | chroma_block_size_y * chroma_block_size_x, |
940 | 0 | chroma_subsamp_y, |
941 | 0 | chroma_subsamp_x); |
942 | |
|
943 | 0 | generate_luma_grain_block(params, |
944 | 0 | pred_pos_luma, |
945 | 0 | luma_grain_block, |
946 | 0 | luma_block_size_y, |
947 | 0 | luma_block_size_x, |
948 | 0 | luma_grain_stride, |
949 | 0 | left_pad, |
950 | 0 | top_pad, |
951 | 0 | right_pad, |
952 | 0 | bottom_pad); |
953 | |
|
954 | 0 | generate_chroma_grain_blocks(params, |
955 | | // pred_pos_luma, |
956 | 0 | pred_pos_chroma, |
957 | 0 | luma_grain_block, |
958 | 0 | cb_grain_block, |
959 | 0 | cr_grain_block, |
960 | 0 | luma_grain_stride, |
961 | 0 | chroma_block_size_y, |
962 | 0 | chroma_block_size_x, |
963 | 0 | chroma_grain_stride, |
964 | 0 | left_pad, |
965 | 0 | top_pad, |
966 | 0 | right_pad, |
967 | 0 | bottom_pad, |
968 | 0 | chroma_subsamp_y, |
969 | 0 | chroma_subsamp_x); |
970 | |
|
971 | 0 | init_scaling_function(params->scaling_points_y, params->num_y_points, scaling_lut_y); |
972 | |
|
973 | 0 | if (params->chroma_scaling_from_luma) { |
974 | 0 | svt_memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256); |
975 | 0 | svt_memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256); |
976 | 0 | } else { |
977 | 0 | init_scaling_function(params->scaling_points_cb, params->num_cb_points, scaling_lut_cb); |
978 | 0 | init_scaling_function(params->scaling_points_cr, params->num_cr_points, scaling_lut_cr); |
979 | 0 | } |
980 | 0 | for (int32_t y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) { |
981 | 0 | init_random_generator(y * 2, params->random_seed); |
982 | |
|
983 | 0 | for (int32_t x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) { |
984 | 0 | int32_t offset_y = get_random_number(8); |
985 | 0 | int32_t offset_x = (offset_y >> 4) & 15; |
986 | 0 | offset_y &= 15; |
987 | |
|
988 | 0 | int32_t luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1); |
989 | 0 | int32_t luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1); |
990 | |
|
991 | 0 | int32_t chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding + |
992 | 0 | offset_y * (2 >> chroma_subsamp_y); |
993 | 0 | int32_t chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding + |
994 | 0 | offset_x * (2 >> chroma_subsamp_x); |
995 | |
|
996 | 0 | if (overlap && x) { |
997 | 0 | ver_boundary_overlap(y_col_buf, |
998 | 0 | 2, |
999 | 0 | luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x, |
1000 | 0 | luma_grain_stride, |
1001 | 0 | y_col_buf, |
1002 | 0 | 2, |
1003 | 0 | 2, |
1004 | 0 | AOMMIN(luma_subblock_size_y + 2, height - (y << 1))); |
1005 | |
|
1006 | 0 | ver_boundary_overlap( |
1007 | 0 | cb_col_buf, |
1008 | 0 | 2 >> chroma_subsamp_x, |
1009 | 0 | cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x, |
1010 | 0 | chroma_grain_stride, |
1011 | 0 | cb_col_buf, |
1012 | 0 | 2 >> chroma_subsamp_x, |
1013 | 0 | 2 >> chroma_subsamp_x, |
1014 | 0 | AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y), (height - (y << 1)) >> chroma_subsamp_y)); |
1015 | |
|
1016 | 0 | ver_boundary_overlap( |
1017 | 0 | cr_col_buf, |
1018 | 0 | 2 >> chroma_subsamp_x, |
1019 | 0 | cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x, |
1020 | 0 | chroma_grain_stride, |
1021 | 0 | cr_col_buf, |
1022 | 0 | 2 >> chroma_subsamp_x, |
1023 | 0 | 2 >> chroma_subsamp_x, |
1024 | 0 | AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y), (height - (y << 1)) >> chroma_subsamp_y)); |
1025 | |
|
1026 | 0 | int32_t i = y ? 1 : 0; |
1027 | |
|
1028 | 0 | if (use_high_bit_depth) { |
1029 | 0 | add_noise_to_block_hbd(params, |
1030 | 0 | (uint16_t*)luma + ((y + i) << 1) * luma_stride + (x << 1), |
1031 | 0 | (uint16_t*)cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + |
1032 | 0 | (x << (1 - chroma_subsamp_x)), |
1033 | 0 | (uint16_t*)cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + |
1034 | 0 | (x << (1 - chroma_subsamp_x)), |
1035 | 0 | luma_stride, |
1036 | 0 | chroma_stride, |
1037 | 0 | y_col_buf + i * 4, |
1038 | 0 | cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x), |
1039 | 0 | cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x), |
1040 | 0 | 2, |
1041 | 0 | (2 - chroma_subsamp_x), |
1042 | 0 | AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, |
1043 | 0 | 1, |
1044 | 0 | bit_depth, |
1045 | 0 | chroma_subsamp_y, |
1046 | 0 | chroma_subsamp_x); |
1047 | 0 | } else { |
1048 | 0 | add_noise_to_block( |
1049 | 0 | params, |
1050 | 0 | luma + ((y + i) << 1) * luma_stride + (x << 1), |
1051 | 0 | cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + (x << (1 - chroma_subsamp_x)), |
1052 | 0 | cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + (x << (1 - chroma_subsamp_x)), |
1053 | 0 | luma_stride, |
1054 | 0 | chroma_stride, |
1055 | 0 | y_col_buf + i * 4, |
1056 | 0 | cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x), |
1057 | 0 | cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x), |
1058 | 0 | 2, |
1059 | 0 | (2 - chroma_subsamp_x), |
1060 | 0 | AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, |
1061 | 0 | 1, |
1062 | 0 | bit_depth, |
1063 | 0 | chroma_subsamp_y, |
1064 | 0 | chroma_subsamp_x); |
1065 | 0 | } |
1066 | 0 | } |
1067 | |
|
1068 | 0 | if (overlap && y) { |
1069 | 0 | if (x) { |
1070 | 0 | ASSERT(y_col_buf != NULL); |
1071 | 0 | hor_boundary_overlap( |
1072 | 0 | y_line_buf + (x << 1), luma_stride, y_col_buf, 2, y_line_buf + (x << 1), luma_stride, 2, 2); |
1073 | |
|
1074 | 0 | hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x), |
1075 | 0 | chroma_stride, |
1076 | 0 | cb_col_buf, |
1077 | 0 | 2 >> chroma_subsamp_x, |
1078 | 0 | cb_line_buf + x * (2 >> chroma_subsamp_x), |
1079 | 0 | chroma_stride, |
1080 | 0 | 2 >> chroma_subsamp_x, |
1081 | 0 | 2 >> chroma_subsamp_y); |
1082 | |
|
1083 | 0 | hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x), |
1084 | 0 | chroma_stride, |
1085 | 0 | cr_col_buf, |
1086 | 0 | 2 >> chroma_subsamp_x, |
1087 | 0 | cr_line_buf + x * (2 >> chroma_subsamp_x), |
1088 | 0 | chroma_stride, |
1089 | 0 | 2 >> chroma_subsamp_x, |
1090 | 0 | 2 >> chroma_subsamp_y); |
1091 | 0 | } |
1092 | |
|
1093 | 0 | hor_boundary_overlap(y_line_buf + ((x ? x + 1 : 0) << 1), |
1094 | 0 | luma_stride, |
1095 | 0 | luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x + (x ? 2 : 0), |
1096 | 0 | luma_grain_stride, |
1097 | 0 | y_line_buf + ((x ? x + 1 : 0) << 1), |
1098 | 0 | luma_stride, |
1099 | 0 | AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1), width - ((x ? x + 1 : 0) << 1)), |
1100 | 0 | 2); |
1101 | |
|
1102 | 0 | hor_boundary_overlap(cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)), |
1103 | 0 | chroma_stride, |
1104 | 0 | cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x + |
1105 | 0 | ((x ? 1 : 0) << (1 - chroma_subsamp_x)), |
1106 | 0 | chroma_grain_stride, |
1107 | 0 | cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)), |
1108 | 0 | chroma_stride, |
1109 | 0 | AOMMIN(chroma_subblock_size_x - ((x ? 1 : 0) << (1 - chroma_subsamp_x)), |
1110 | 0 | (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x), |
1111 | 0 | 2 >> chroma_subsamp_y); |
1112 | |
|
1113 | 0 | hor_boundary_overlap(cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)), |
1114 | 0 | chroma_stride, |
1115 | 0 | cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x + |
1116 | 0 | ((x ? 1 : 0) << (1 - chroma_subsamp_x)), |
1117 | 0 | chroma_grain_stride, |
1118 | 0 | cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)), |
1119 | 0 | chroma_stride, |
1120 | 0 | AOMMIN(chroma_subblock_size_x - ((x ? 1 : 0) << (1 - chroma_subsamp_x)), |
1121 | 0 | (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x), |
1122 | 0 | 2 >> chroma_subsamp_y); |
1123 | |
|
1124 | 0 | if (use_high_bit_depth) { |
1125 | 0 | add_noise_to_block_hbd( |
1126 | 0 | params, |
1127 | 0 | (uint16_t*)luma + (y << 1) * luma_stride + (x << 1), |
1128 | 0 | (uint16_t*)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride + (x << ((1 - chroma_subsamp_x))), |
1129 | 0 | (uint16_t*)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride + (x << ((1 - chroma_subsamp_x))), |
1130 | 0 | luma_stride, |
1131 | 0 | chroma_stride, |
1132 | 0 | y_line_buf + (x << 1), |
1133 | 0 | cb_line_buf + (x << (1 - chroma_subsamp_x)), |
1134 | 0 | cr_line_buf + (x << (1 - chroma_subsamp_x)), |
1135 | 0 | luma_stride, |
1136 | 0 | chroma_stride, |
1137 | 0 | 1, |
1138 | 0 | AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), |
1139 | 0 | bit_depth, |
1140 | 0 | chroma_subsamp_y, |
1141 | 0 | chroma_subsamp_x); |
1142 | 0 | } else { |
1143 | 0 | add_noise_to_block( |
1144 | 0 | params, |
1145 | 0 | luma + (y << 1) * luma_stride + (x << 1), |
1146 | 0 | cb + (y << (1 - chroma_subsamp_y)) * chroma_stride + (x << ((1 - chroma_subsamp_x))), |
1147 | 0 | cr + (y << (1 - chroma_subsamp_y)) * chroma_stride + (x << ((1 - chroma_subsamp_x))), |
1148 | 0 | luma_stride, |
1149 | 0 | chroma_stride, |
1150 | 0 | y_line_buf + (x << 1), |
1151 | 0 | cb_line_buf + (x << (1 - chroma_subsamp_x)), |
1152 | 0 | cr_line_buf + (x << (1 - chroma_subsamp_x)), |
1153 | 0 | luma_stride, |
1154 | 0 | chroma_stride, |
1155 | 0 | 1, |
1156 | 0 | AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), |
1157 | 0 | bit_depth, |
1158 | 0 | chroma_subsamp_y, |
1159 | 0 | chroma_subsamp_x); |
1160 | 0 | } |
1161 | 0 | } |
1162 | |
|
1163 | 0 | int32_t i = overlap && y ? 1 : 0; |
1164 | 0 | int32_t j = overlap && x ? 1 : 0; |
1165 | |
|
1166 | 0 | if (use_high_bit_depth) { |
1167 | 0 | add_noise_to_block_hbd( |
1168 | 0 | params, |
1169 | 0 | (uint16_t*)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1), |
1170 | 0 | (uint16_t*)cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + |
1171 | 0 | ((x + j) << (1 - chroma_subsamp_x)), |
1172 | 0 | (uint16_t*)cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + |
1173 | 0 | ((x + j) << (1 - chroma_subsamp_x)), |
1174 | 0 | luma_stride, |
1175 | 0 | chroma_stride, |
1176 | 0 | luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride + luma_offset_x + (j << 1), |
1177 | 0 | cb_grain_block + (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride + |
1178 | 0 | chroma_offset_x + (j << (1 - chroma_subsamp_x)), |
1179 | 0 | cr_grain_block + (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride + |
1180 | 0 | chroma_offset_x + (j << (1 - chroma_subsamp_x)), |
1181 | 0 | luma_grain_stride, |
1182 | 0 | chroma_grain_stride, |
1183 | 0 | AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, |
1184 | 0 | AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, |
1185 | 0 | bit_depth, |
1186 | 0 | chroma_subsamp_y, |
1187 | 0 | chroma_subsamp_x); |
1188 | 0 | } else { |
1189 | 0 | add_noise_to_block( |
1190 | 0 | params, |
1191 | 0 | luma + ((y + i) << 1) * luma_stride + ((x + j) << 1), |
1192 | 0 | cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + ((x + j) << (1 - chroma_subsamp_x)), |
1193 | 0 | cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + ((x + j) << (1 - chroma_subsamp_x)), |
1194 | 0 | luma_stride, |
1195 | 0 | chroma_stride, |
1196 | 0 | luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride + luma_offset_x + (j << 1), |
1197 | 0 | cb_grain_block + (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride + |
1198 | 0 | chroma_offset_x + (j << (1 - chroma_subsamp_x)), |
1199 | 0 | cr_grain_block + (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride + |
1200 | 0 | chroma_offset_x + (j << (1 - chroma_subsamp_x)), |
1201 | 0 | luma_grain_stride, |
1202 | 0 | chroma_grain_stride, |
1203 | 0 | AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, |
1204 | 0 | AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, |
1205 | 0 | bit_depth, |
1206 | 0 | chroma_subsamp_y, |
1207 | 0 | chroma_subsamp_x); |
1208 | 0 | } |
1209 | |
|
1210 | 0 | if (overlap) { |
1211 | 0 | if (x) { |
1212 | | // Copy overlapped column bufer to line buffer |
1213 | 0 | copy_area(y_col_buf + (luma_subblock_size_y << 1), 2, y_line_buf + (x << 1), luma_stride, 2, 2); |
1214 | |
|
1215 | 0 | copy_area(cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)), |
1216 | 0 | 2 >> chroma_subsamp_x, |
1217 | 0 | cb_line_buf + (x << (1 - chroma_subsamp_x)), |
1218 | 0 | chroma_stride, |
1219 | 0 | 2 >> chroma_subsamp_x, |
1220 | 0 | 2 >> chroma_subsamp_y); |
1221 | |
|
1222 | 0 | copy_area(cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)), |
1223 | 0 | 2 >> chroma_subsamp_x, |
1224 | 0 | cr_line_buf + (x << (1 - chroma_subsamp_x)), |
1225 | 0 | chroma_stride, |
1226 | 0 | 2 >> chroma_subsamp_x, |
1227 | 0 | 2 >> chroma_subsamp_y); |
1228 | 0 | } |
1229 | | |
1230 | | // Copy grain to the line buffer for overlap with a bottom block |
1231 | 0 | copy_area(luma_grain_block + (luma_offset_y + luma_subblock_size_y) * luma_grain_stride + |
1232 | 0 | luma_offset_x + ((x ? 2 : 0)), |
1233 | 0 | luma_grain_stride, |
1234 | 0 | y_line_buf + ((x ? x + 1 : 0) << 1), |
1235 | 0 | luma_stride, |
1236 | 0 | AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0), |
1237 | 0 | 2); |
1238 | |
|
1239 | 0 | copy_area(cb_grain_block + (chroma_offset_y + chroma_subblock_size_y) * chroma_grain_stride + |
1240 | 0 | chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0), |
1241 | 0 | chroma_grain_stride, |
1242 | 0 | cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)), |
1243 | 0 | chroma_stride, |
1244 | 0 | AOMMIN(chroma_subblock_size_x, ((width - (x << 1)) >> chroma_subsamp_x)) - |
1245 | 0 | (x ? 2 >> chroma_subsamp_x : 0), |
1246 | 0 | 2 >> chroma_subsamp_y); |
1247 | |
|
1248 | 0 | copy_area(cr_grain_block + (chroma_offset_y + chroma_subblock_size_y) * chroma_grain_stride + |
1249 | 0 | chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0), |
1250 | 0 | chroma_grain_stride, |
1251 | 0 | cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)), |
1252 | 0 | chroma_stride, |
1253 | 0 | AOMMIN(chroma_subblock_size_x, ((width - (x << 1)) >> chroma_subsamp_x)) - |
1254 | 0 | (x ? 2 >> chroma_subsamp_x : 0), |
1255 | 0 | 2 >> chroma_subsamp_y); |
1256 | | |
1257 | | // Copy grain to the column buffer for overlap with the next block to |
1258 | | // the right |
1259 | |
|
1260 | 0 | copy_area(luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x + luma_subblock_size_x, |
1261 | 0 | luma_grain_stride, |
1262 | 0 | y_col_buf, |
1263 | 0 | 2, |
1264 | 0 | 2, |
1265 | 0 | AOMMIN(luma_subblock_size_y + 2, height - (y << 1))); |
1266 | |
|
1267 | 0 | copy_area( |
1268 | 0 | cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x + chroma_subblock_size_x, |
1269 | 0 | chroma_grain_stride, |
1270 | 0 | cb_col_buf, |
1271 | 0 | 2 >> chroma_subsamp_x, |
1272 | 0 | 2 >> chroma_subsamp_x, |
1273 | 0 | AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y), (height - (y << 1)) >> chroma_subsamp_y)); |
1274 | |
|
1275 | 0 | copy_area( |
1276 | 0 | cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x + chroma_subblock_size_x, |
1277 | 0 | chroma_grain_stride, |
1278 | 0 | cr_col_buf, |
1279 | 0 | 2 >> chroma_subsamp_x, |
1280 | 0 | 2 >> chroma_subsamp_x, |
1281 | 0 | AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y), (height - (y << 1)) >> chroma_subsamp_y)); |
1282 | 0 | } |
1283 | 0 | } |
1284 | 0 | } |
1285 | |
|
1286 | 0 | dealloc_arrays(params, |
1287 | 0 | &pred_pos_luma, |
1288 | 0 | &pred_pos_chroma, |
1289 | 0 | &luma_grain_block, |
1290 | 0 | &cb_grain_block, |
1291 | 0 | &cr_grain_block, |
1292 | 0 | &y_line_buf, |
1293 | 0 | &cb_line_buf, |
1294 | 0 | &cr_line_buf, |
1295 | 0 | &y_col_buf, |
1296 | 0 | &cb_col_buf, |
1297 | 0 | &cr_col_buf); |
1298 | 0 | } |