Coverage Report

Created: 2026-05-16 06:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/decoder/grain_synthesis.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
/*!\file
13
 * \brief Describes film grain parameters and film grain synthesis
14
 *
15
 */
16
17
#include <stdbool.h>
18
#include <stdio.h>
19
#include <string.h>
20
#include <stdlib.h>
21
#include <assert.h>
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_mem/aom_mem.h"
24
#include "av1/decoder/grain_synthesis.h"
25
26
// Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
27
// with zero mean and standard deviation of about 512.
28
// should be divided by 4 for 10-bit range and 16 for 8-bit range.
29
static const int gaussian_sequence[2048] = {
30
  56,    568,   -180,  172,   124,   -84,   172,   -64,   -900,  24,   820,
31
  224,   1248,  996,   272,   -8,    -916,  -388,  -732,  -104,  -188, 800,
32
  112,   -652,  -320,  -376,  140,   -252,  492,   -168,  44,    -788, 588,
33
  -584,  500,   -228,  12,    680,   272,   -476,  972,   -100,  652,  368,
34
  432,   -196,  -720,  -192,  1000,  -332,  652,   -136,  -552,  -604, -4,
35
  192,   -220,  -136,  1000,  -52,   372,   -96,   -624,  124,   -24,  396,
36
  540,   -12,   -104,  640,   464,   244,   -208,  -84,   368,   -528, -740,
37
  248,   -968,  -848,  608,   376,   -60,   -292,  -40,   -156,  252,  -292,
38
  248,   224,   -280,  400,   -244,  244,   -60,   76,    -80,   212,  532,
39
  340,   128,   -36,   824,   -352,  -60,   -264,  -96,   -612,  416,  -704,
40
  220,   -204,  640,   -160,  1220,  -408,  900,   336,   20,    -336, -96,
41
  -792,  304,   48,    -28,   -1232, -1172, -448,  104,   -292,  -520, 244,
42
  60,    -948,  0,     -708,  268,   108,   356,   -548,  488,   -344, -136,
43
  488,   -196,  -224,  656,   -236,  -1128, 60,    4,     140,   276,  -676,
44
  -376,  168,   -108,  464,   8,     564,   64,    240,   308,   -300, -400,
45
  -456,  -136,  56,    120,   -408,  -116,  436,   504,   -232,  328,  844,
46
  -164,  -84,   784,   -168,  232,   -224,  348,   -376,  128,   568,  96,
47
  -1244, -288,  276,   848,   832,   -360,  656,   464,   -384,  -332, -356,
48
  728,   -388,  160,   -192,  468,   296,   224,   140,   -776,  -100, 280,
49
  4,     196,   44,    -36,   -648,  932,   16,    1428,  28,    528,  808,
50
  772,   20,    268,   88,    -332,  -284,  124,   -384,  -448,  208,  -228,
51
  -1044, -328,  660,   380,   -148,  -300,  588,   240,   540,   28,   136,
52
  -88,   -436,  256,   296,   -1000, 1400,  0,     -48,   1056,  -136, 264,
53
  -528,  -1108, 632,   -484,  -592,  -344,  796,   124,   -668,  -768, 388,
54
  1296,  -232,  -188,  -200,  -288,  -4,    308,   100,   -168,  256,  -500,
55
  204,   -508,  648,   -136,  372,   -272,  -120,  -1004, -552,  -548, -384,
56
  548,   -296,  428,   -108,  -8,    -912,  -324,  -224,  -88,   -112, -220,
57
  -100,  996,   -796,  548,   360,   -216,  180,   428,   -200,  -212, 148,
58
  96,    148,   284,   216,   -412,  -320,  120,   -300,  -384,  -604, -572,
59
  -332,  -8,    -180,  -176,  696,   116,   -88,   628,   76,    44,   -516,
60
  240,   -208,  -40,   100,   -592,  344,   -308,  -452,  -228,  20,   916,
61
  -1752, -136,  -340,  -804,  140,   40,    512,   340,   248,   184,  -492,
62
  896,   -156,  932,   -628,  328,   -688,  -448,  -616,  -752,  -100, 560,
63
  -1020, 180,   -800,  -64,   76,    576,   1068,  396,   660,   552,  -108,
64
  -28,   320,   -628,  312,   -92,   -92,   -472,  268,   16,    560,  516,
65
  -672,  -52,   492,   -100,  260,   384,   284,   292,   304,   -148, 88,
66
  -152,  1012,  1064,  -228,  164,   -376,  -684,  592,   -392,  156,  196,
67
  -524,  -64,   -884,  160,   -176,  636,   648,   404,   -396,  -436, 864,
68
  424,   -728,  988,   -604,  904,   -592,  296,   -224,  536,   -176, -920,
69
  436,   -48,   1176,  -884,  416,   -776,  -824,  -884,  524,   -548, -564,
70
  -68,   -164,  -96,   692,   364,   -692,  -1012, -68,   260,   -480, 876,
71
  -1116, 452,   -332,  -352,  892,   -1088, 1220,  -676,  12,    -292, 244,
72
  496,   372,   -32,   280,   200,   112,   -440,  -96,   24,    -644, -184,
73
  56,    -432,  224,   -980,  272,   -260,  144,   -436,  420,   356,  364,
74
  -528,  76,    172,   -744,  -368,  404,   -752,  -416,  684,   -688, 72,
75
  540,   416,   92,    444,   480,   -72,   -1416, 164,   -1172, -68,  24,
76
  424,   264,   1040,  128,   -912,  -524,  -356,  64,    876,   -12,  4,
77
  -88,   532,   272,   -524,  320,   276,   -508,  940,   24,    -400, -120,
78
  756,   60,    236,   -412,  100,   376,   -484,  400,   -100,  -740, -108,
79
  -260,  328,   -268,  224,   -200,  -416,  184,   -604,  -564,  -20,  296,
80
  60,    892,   -888,  60,    164,   68,    -760,  216,   -296,  904,  -336,
81
  -28,   404,   -356,  -568,  -208,  -1480, -512,  296,   328,   -360, -164,
82
  -1560, -776,  1156,  -428,  164,   -504,  -112,  120,   -216,  -148, -264,
83
  308,   32,    64,    -72,   72,    116,   176,   -64,   -272,  460,  -536,
84
  -784,  -280,  348,   108,   -752,  -132,  524,   -540,  -776,  116,  -296,
85
  -1196, -288,  -560,  1040,  -472,  116,   -848,  -1116, 116,   636,  696,
86
  284,   -176,  1016,  204,   -864,  -648,  -248,  356,   972,   -584, -204,
87
  264,   880,   528,   -24,   -184,  116,   448,   -144,  828,   524,  212,
88
  -212,  52,    12,    200,   268,   -488,  -404,  -880,  824,   -672, -40,
89
  908,   -248,  500,   716,   -576,  492,   -576,  16,    720,   -108, 384,
90
  124,   344,   280,   576,   -500,  252,   104,   -308,  196,   -188, -8,
91
  1268,  296,   1032,  -1196, 436,   316,   372,   -432,  -200,  -660, 704,
92
  -224,  596,   -132,  268,   32,    -452,  884,   104,   -1008, 424,  -1348,
93
  -280,  4,     -1168, 368,   476,   696,   300,   -8,    24,    180,  -592,
94
  -196,  388,   304,   500,   724,   -160,  244,   -84,   272,   -256, -420,
95
  320,   208,   -144,  -156,  156,   364,   452,   28,    540,   316,  220,
96
  -644,  -248,  464,   72,    360,   32,    -388,  496,   -680,  -48,  208,
97
  -116,  -408,  60,    -604,  -392,  548,   -840,  784,   -460,  656,  -544,
98
  -388,  -264,  908,   -800,  -628,  -612,  -568,  572,   -220,  164,  288,
99
  -16,   -308,  308,   -112,  -636,  -760,  280,   -668,  432,   364,  240,
100
  -196,  604,   340,   384,   196,   592,   -44,   -500,  432,   -580, -132,
101
  636,   -76,   392,   4,     -412,  540,   508,   328,   -356,  -36,  16,
102
  -220,  -64,   -248,  -60,   24,    -192,  368,   1040,  92,    -24,  -1044,
103
  -32,   40,    104,   148,   192,   -136,  -520,  56,    -816,  -224, 732,
104
  392,   356,   212,   -80,   -424,  -1008, -324,  588,   -1496, 576,  460,
105
  -816,  -848,  56,    -580,  -92,   -1372, -112,  -496,  200,   364,  52,
106
  -140,  48,    -48,   -60,   84,    72,    40,    132,   -356,  -268, -104,
107
  -284,  -404,  732,   -520,  164,   -304,  -540,  120,   328,   -76,  -460,
108
  756,   388,   588,   236,   -436,  -72,   -176,  -404,  -316,  -148, 716,
109
  -604,  404,   -72,   -88,   -888,  -68,   944,   88,    -220,  -344, 960,
110
  472,   460,   -232,  704,   120,   832,   -228,  692,   -508,  132,  -476,
111
  844,   -748,  -364,  -44,   1116,  -1104, -1056, 76,    428,   552,  -692,
112
  60,    356,   96,    -384,  -188,  -612,  -576,  736,   508,   892,  352,
113
  -1132, 504,   -24,   -352,  324,   332,   -600,  -312,  292,   508,  -144,
114
  -8,    484,   48,    284,   -260,  -240,  256,   -100,  -292,  -204, -44,
115
  472,   -204,  908,   -188,  -1000, -256,  92,    1164,  -392,  564,  356,
116
  652,   -28,   -884,  256,   484,   -192,  760,   -176,  376,   -524, -452,
117
  -436,  860,   -736,  212,   124,   504,   -476,  468,   76,    -472, 552,
118
  -692,  -944,  -620,  740,   -240,  400,   132,   20,    192,   -196, 264,
119
  -668,  -1012, -60,   296,   -316,  -828,  76,    -156,  284,   -768, -448,
120
  -832,  148,   248,   652,   616,   1236,  288,   -328,  -400,  -124, 588,
121
  220,   520,   -696,  1032,  768,   -740,  -92,   -272,  296,   448,  -464,
122
  412,   -200,  392,   440,   -200,  264,   -152,  -260,  320,   1032, 216,
123
  320,   -8,    -64,   156,   -1016, 1084,  1172,  536,   484,   -432, 132,
124
  372,   -52,   -256,  84,    116,   -352,  48,    116,   304,   -384, 412,
125
  924,   -300,  528,   628,   180,   648,   44,    -980,  -220,  1320, 48,
126
  332,   748,   524,   -268,  -720,  540,   -276,  564,   -344,  -208, -196,
127
  436,   896,   88,    -392,  132,   80,    -964,  -288,  568,   56,   -48,
128
  -456,  888,   8,     552,   -156,  -292,  948,   288,   128,   -716, -292,
129
  1192,  -152,  876,   352,   -600,  -260,  -812,  -468,  -28,   -120, -32,
130
  -44,   1284,  496,   192,   464,   312,   -76,   -516,  -380,  -456, -1012,
131
  -48,   308,   -156,  36,    492,   -156,  -808,  188,   1652,  68,   -120,
132
  -116,  316,   160,   -140,  352,   808,   -416,  592,   316,   -480, 56,
133
  528,   -204,  -568,  372,   -232,  752,   -344,  744,   -4,    324,  -416,
134
  -600,  768,   268,   -248,  -88,   -132,  -420,  -432,  80,    -288, 404,
135
  -316,  -1216, -588,  520,   -108,  92,    -320,  368,   -480,  -216, -92,
136
  1688,  -300,  180,   1020,  -176,  820,   -68,   -228,  -260,  436,  -904,
137
  20,    40,    -508,  440,   -736,  312,   332,   204,   760,   -372, 728,
138
  96,    -20,   -632,  -520,  -560,  336,   1076,  -64,   -532,  776,  584,
139
  192,   396,   -728,  -520,  276,   -188,  80,    -52,   -612,  -252, -48,
140
  648,   212,   -688,  228,   -52,   -260,  428,   -412,  -272,  -404, 180,
141
  816,   -796,  48,    152,   484,   -88,   -216,  988,   696,   188,  -528,
142
  648,   -116,  -180,  316,   476,   12,    -564,  96,    476,   -252, -364,
143
  -376,  -392,  556,   -256,  -576,  260,   -352,  120,   -16,   -136, -260,
144
  -492,  72,    556,   660,   580,   616,   772,   436,   424,   -32,  -324,
145
  -1268, 416,   -324,  -80,   920,   160,   228,   724,   32,    -516, 64,
146
  384,   68,    -128,  136,   240,   248,   -204,  -68,   252,   -932, -120,
147
  -480,  -628,  -84,   192,   852,   -404,  -288,  -132,  204,   100,  168,
148
  -68,   -196,  -868,  460,   1080,  380,   -80,   244,   0,     484,  -888,
149
  64,    184,   352,   600,   460,   164,   604,   -196,  320,   -64,  588,
150
  -184,  228,   12,    372,   48,    -848,  -344,  224,   208,   -200, 484,
151
  128,   -20,   272,   -468,  -840,  384,   256,   -720,  -520,  -464, -580,
152
  112,   -120,  644,   -356,  -208,  -608,  -528,  704,   560,   -424, 392,
153
  828,   40,    84,    200,   -152,  0,     -144,  584,   280,   -120, 80,
154
  -556,  -972,  -196,  -472,  724,   80,    168,   -32,   88,    160,  -688,
155
  0,     160,   356,   372,   -776,  740,   -128,  676,   -248,  -480, 4,
156
  -364,  96,    544,   232,   -1032, 956,   236,   356,   20,    -40,  300,
157
  24,    -676,  -596,  132,   1120,  -104,  532,   -1096, 568,   648,  444,
158
  508,   380,   188,   -376,  -604,  1488,  424,   24,    756,   -220, -192,
159
  716,   120,   920,   688,   168,   44,    -460,  568,   284,   1144, 1160,
160
  600,   424,   888,   656,   -356,  -320,  220,   316,   -176,  -724, -188,
161
  -816,  -628,  -348,  -228,  -380,  1012,  -452,  -660,  736,   928,  404,
162
  -696,  -72,   -268,  -892,  128,   184,   -344,  -780,  360,   336,  400,
163
  344,   428,   548,   -112,  136,   -228,  -216,  -820,  -516,  340,  92,
164
  -136,  116,   -300,  376,   -244,  100,   -316,  -520,  -284,  -12,  824,
165
  164,   -548,  -180,  -128,  116,   -924,  -828,  268,   -368,  -580, 620,
166
  192,   160,   0,     -1676, 1068,  424,   -56,   -360,  468,   -156, 720,
167
  288,   -528,  556,   -364,  548,   -148,  504,   316,   152,   -648, -620,
168
  -684,  -24,   -376,  -384,  -108,  -920,  -1032, 768,   180,   -264, -508,
169
  -1268, -260,  -60,   300,   -240,  988,   724,   -376,  -576,  -212, -736,
170
  556,   192,   1092,  -620,  -880,  376,   -56,   -4,    -216,  -32,  836,
171
  268,   396,   1332,  864,   -600,  100,   56,    -412,  -92,   356,  180,
172
  884,   -468,  -436,  292,   -388,  -804,  -704,  -840,  368,   -348, 140,
173
  -724,  1536,  940,   372,   112,   -372,  436,   -480,  1136,  296,  -32,
174
  -228,  132,   -48,   -220,  868,   -1016, -60,   -1044, -464,  328,  916,
175
  244,   12,    -736,  -296,  360,   468,   -376,  -108,  -92,   788,  368,
176
  -56,   544,   400,   -672,  -420,  728,   16,    320,   44,    -284, -380,
177
  -796,  488,   132,   204,   -596,  -372,  88,    -152,  -908,  -636, -572,
178
  -624,  -116,  -692,  -200,  -56,   276,   -88,   484,   -324,  948,  864,
179
  1000,  -456,  -184,  -276,  292,   -296,  156,   676,   320,   160,  908,
180
  -84,   -1236, -288,  -116,  260,   -372,  -644,  732,   -756,  -96,  84,
181
  344,   -520,  348,   -688,  240,   -84,   216,   -1044, -136,  -676, -396,
182
  -1500, 960,   -40,   176,   168,   1516,  420,   -504,  -344,  -364, -360,
183
  1216,  -940,  -380,  -212,  252,   -660,  -708,  484,   -444,  -152, 928,
184
  -120,  1112,  476,   -260,  560,   -148,  -344,  108,   -196,  228,  -288,
185
  504,   560,   -328,  -88,   288,   -1008, 460,   -228,  468,   -836, -196,
186
  76,    388,   232,   412,   -1168, -716,  -644,  756,   -172,  -356, -504,
187
  116,   432,   528,   48,    476,   -168,  -608,  448,   160,   -532, -272,
188
  28,    -676,  -12,   828,   980,   456,   520,   104,   -104,  256,  -344,
189
  -4,    -28,   -368,  -52,   -524,  -572,  -556,  -200,  768,   1124, -208,
190
  -512,  176,   232,   248,   -148,  -888,  604,   -600,  -304,  804,  -156,
191
  -212,  488,   -192,  -804,  -256,  368,   -360,  -916,  -328,  228,  -240,
192
  -448,  -472,  856,   -556,  -364,  572,   -12,   -156,  -368,  -340, 432,
193
  252,   -752,  -152,  288,   268,   -580,  -848,  -592,  108,   -76,  244,
194
  312,   -716,  592,   -80,   436,   360,   4,     -248,  160,   516,  584,
195
  732,   44,    -468,  -280,  -292,  -156,  -588,  28,    308,   912,  24,
196
  124,   156,   180,   -252,  944,   -924,  -772,  -520,  -428,  -624, 300,
197
  -212,  -1144, 32,    -724,  800,   -1128, -212,  -1288, -848,  180,  -416,
198
  440,   192,   -576,  -792,  -76,   -1080, 80,    -532,  -352,  -132, 380,
199
  -820,  148,   1112,  128,   164,   456,   700,   -924,  144,   -668, -384,
200
  648,   -832,  508,   552,   -52,   -100,  -656,  208,   -568,  748,  -88,
201
  680,   232,   300,   192,   -408,  -1012, -152,  -252,  -268,  272,  -876,
202
  -664,  -648,  -332,  -136,  16,    12,    1152,  -28,   332,   -536, 320,
203
  -672,  -460,  -316,  532,   -260,  228,   -40,   1052,  -816,  180,  88,
204
  -496,  -556,  -672,  -368,  428,   92,    356,   404,   -408,  252,  196,
205
  -176,  -556,  792,   268,   32,    372,   40,    96,    -332,  328,  120,
206
  372,   -900,  -40,   472,   -264,  -592,  952,   128,   656,   112,  664,
207
  -232,  420,   4,     -344,  -464,  556,   244,   -416,  -32,   252,  0,
208
  -412,  188,   -696,  508,   -476,  324,   -1096, 656,   -312,  560,  264,
209
  -136,  304,   160,   -64,   -580,  248,   336,   -720,  560,   -348, -288,
210
  -276,  -196,  -500,  852,   -544,  -236,  -1128, -992,  -776,  116,  56,
211
  52,    860,   884,   212,   -12,   168,   1020,  512,   -552,  924,  -148,
212
  716,   188,   164,   -340,  -520,  -184,  880,   -152,  -680,  -208, -1156,
213
  -300,  -528,  -472,  364,   100,   -744,  -1056, -32,   540,   280,  144,
214
  -676,  -32,   -232,  -280,  -224,  96,    568,   -76,   172,   148,  148,
215
  104,   32,    -296,  -32,   788,   -80,   32,    -16,   280,   288,  944,
216
  428,   -484
217
};
218
219
static const int gauss_bits = 11;
220
221
static const int luma_subblock_size_y = 32;
222
static const int luma_subblock_size_x = 32;
223
224
static const int min_luma_legal_range = 16;
225
static const int max_luma_legal_range = 235;
226
227
static const int min_chroma_legal_range = 16;
228
static const int max_chroma_legal_range = 240;
229
230
typedef struct {
231
  int y[256];
232
  int cb[256];
233
  int cr[256];
234
} aom_grain_scaling_lut_t;
235
236
typedef struct {
237
  uint16_t random_register;  // random number generator register
238
} aom_grain_rng_t;
239
240
static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
241
                           int ***pred_pos_chroma, int **luma_grain_block,
242
                           int **cb_grain_block, int **cr_grain_block,
243
                           int **y_line_buf, int **cb_line_buf,
244
                           int **cr_line_buf, int **y_col_buf, int **cb_col_buf,
245
8.82k
                           int **cr_col_buf) {
246
8.82k
  int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
247
8.82k
  int num_pos_chroma = num_pos_luma;
248
8.82k
  if (params->num_y_points > 0) ++num_pos_chroma;
249
250
8.82k
  if (*pred_pos_luma) {
251
27.3k
    for (int row = 0; row < num_pos_luma; row++) {
252
18.5k
      aom_free((*pred_pos_luma)[row]);
253
18.5k
    }
254
8.82k
    aom_free(*pred_pos_luma);
255
8.82k
    *pred_pos_luma = NULL;
256
8.82k
  }
257
258
8.82k
  if (*pred_pos_chroma) {
259
34.2k
    for (int row = 0; row < num_pos_chroma; row++) {
260
25.4k
      aom_free((*pred_pos_chroma)[row]);
261
25.4k
    }
262
8.82k
    aom_free(*pred_pos_chroma);
263
8.82k
    *pred_pos_chroma = NULL;
264
8.82k
  }
265
266
8.82k
  aom_free(*y_line_buf);
267
8.82k
  *y_line_buf = NULL;
268
269
8.82k
  aom_free(*cb_line_buf);
270
8.82k
  *cb_line_buf = NULL;
271
272
8.82k
  aom_free(*cr_line_buf);
273
8.82k
  *cr_line_buf = NULL;
274
275
8.82k
  aom_free(*y_col_buf);
276
8.82k
  *y_col_buf = NULL;
277
278
8.82k
  aom_free(*cb_col_buf);
279
8.82k
  *cb_col_buf = NULL;
280
281
8.82k
  aom_free(*cr_col_buf);
282
8.82k
  *cr_col_buf = NULL;
283
284
8.82k
  aom_free(*luma_grain_block);
285
8.82k
  *luma_grain_block = NULL;
286
287
8.82k
  aom_free(*cb_grain_block);
288
8.82k
  *cb_grain_block = NULL;
289
290
8.82k
  aom_free(*cr_grain_block);
291
8.82k
  *cr_grain_block = NULL;
292
8.82k
}
293
294
static bool init_arrays(const aom_film_grain_t *params, int luma_stride,
295
                        int chroma_stride, int ***pred_pos_luma_p,
296
                        int ***pred_pos_chroma_p, int **luma_grain_block,
297
                        int **cb_grain_block, int **cr_grain_block,
298
                        int **y_line_buf, int **cb_line_buf, int **cr_line_buf,
299
                        int **y_col_buf, int **cb_col_buf, int **cr_col_buf,
300
                        int luma_grain_samples, int chroma_grain_samples,
301
8.82k
                        int chroma_subsamp_y, int chroma_subsamp_x) {
302
8.82k
  *pred_pos_luma_p = NULL;
303
8.82k
  *pred_pos_chroma_p = NULL;
304
8.82k
  *luma_grain_block = NULL;
305
8.82k
  *cb_grain_block = NULL;
306
8.82k
  *cr_grain_block = NULL;
307
8.82k
  *y_line_buf = NULL;
308
8.82k
  *cb_line_buf = NULL;
309
8.82k
  *cr_line_buf = NULL;
310
8.82k
  *y_col_buf = NULL;
311
8.82k
  *cb_col_buf = NULL;
312
8.82k
  *cr_col_buf = NULL;
313
314
8.82k
  const int chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
315
316
8.82k
  int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
317
8.82k
  int num_pos_chroma = num_pos_luma;
318
8.82k
  if (params->num_y_points > 0) ++num_pos_chroma;
319
320
8.82k
  int **pred_pos_luma;
321
8.82k
  int **pred_pos_chroma;
322
323
8.82k
  pred_pos_luma = (int **)aom_calloc(num_pos_luma, sizeof(*pred_pos_luma));
324
8.82k
  if (!pred_pos_luma) return false;
325
326
27.3k
  for (int row = 0; row < num_pos_luma; row++) {
327
18.5k
    pred_pos_luma[row] = (int *)aom_malloc(sizeof(**pred_pos_luma) * 3);
328
18.5k
    if (!pred_pos_luma[row]) {
329
0
      dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p,
330
0
                     luma_grain_block, cb_grain_block, cr_grain_block,
331
0
                     y_line_buf, cb_line_buf, cr_line_buf, y_col_buf,
332
0
                     cb_col_buf, cr_col_buf);
333
0
      return false;
334
0
    }
335
18.5k
  }
336
337
8.82k
  pred_pos_chroma =
338
8.82k
      (int **)aom_calloc(num_pos_chroma, sizeof(*pred_pos_chroma));
339
8.82k
  if (!pred_pos_chroma) {
340
0
    dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p, luma_grain_block,
341
0
                   cb_grain_block, cr_grain_block, y_line_buf, cb_line_buf,
342
0
                   cr_line_buf, y_col_buf, cb_col_buf, cr_col_buf);
343
0
    return false;
344
0
  }
345
346
34.2k
  for (int row = 0; row < num_pos_chroma; row++) {
347
25.4k
    pred_pos_chroma[row] = (int *)aom_malloc(sizeof(**pred_pos_chroma) * 3);
348
25.4k
    if (!pred_pos_chroma[row]) {
349
0
      dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p,
350
0
                     luma_grain_block, cb_grain_block, cr_grain_block,
351
0
                     y_line_buf, cb_line_buf, cr_line_buf, y_col_buf,
352
0
                     cb_col_buf, cr_col_buf);
353
0
      return false;
354
0
    }
355
25.4k
  }
356
357
8.82k
  int pos_ar_index = 0;
358
359
12.5k
  for (int row = -params->ar_coeff_lag; row < 0; row++) {
360
18.5k
    for (int col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1;
361
14.7k
         col++) {
362
14.7k
      pred_pos_luma[pos_ar_index][0] = row;
363
14.7k
      pred_pos_luma[pos_ar_index][1] = col;
364
14.7k
      pred_pos_luma[pos_ar_index][2] = 0;
365
366
14.7k
      pred_pos_chroma[pos_ar_index][0] = row;
367
14.7k
      pred_pos_chroma[pos_ar_index][1] = col;
368
14.7k
      pred_pos_chroma[pos_ar_index][2] = 0;
369
14.7k
      ++pos_ar_index;
370
14.7k
    }
371
3.76k
  }
372
373
12.5k
  for (int col = -params->ar_coeff_lag; col < 0; col++) {
374
3.76k
    pred_pos_luma[pos_ar_index][0] = 0;
375
3.76k
    pred_pos_luma[pos_ar_index][1] = col;
376
3.76k
    pred_pos_luma[pos_ar_index][2] = 0;
377
378
3.76k
    pred_pos_chroma[pos_ar_index][0] = 0;
379
3.76k
    pred_pos_chroma[pos_ar_index][1] = col;
380
3.76k
    pred_pos_chroma[pos_ar_index][2] = 0;
381
382
3.76k
    ++pos_ar_index;
383
3.76k
  }
384
385
8.82k
  if (params->num_y_points > 0) {
386
6.88k
    pred_pos_chroma[pos_ar_index][0] = 0;
387
6.88k
    pred_pos_chroma[pos_ar_index][1] = 0;
388
6.88k
    pred_pos_chroma[pos_ar_index][2] = 1;
389
6.88k
  }
390
391
8.82k
  *pred_pos_luma_p = pred_pos_luma;
392
8.82k
  *pred_pos_chroma_p = pred_pos_chroma;
393
394
8.82k
  *y_line_buf = (int *)aom_malloc(sizeof(**y_line_buf) * luma_stride * 2);
395
8.82k
  *cb_line_buf = (int *)aom_malloc(sizeof(**cb_line_buf) * chroma_stride *
396
8.82k
                                   (2 >> chroma_subsamp_y));
397
8.82k
  *cr_line_buf = (int *)aom_malloc(sizeof(**cr_line_buf) * chroma_stride *
398
8.82k
                                   (2 >> chroma_subsamp_y));
399
400
8.82k
  *y_col_buf =
401
8.82k
      (int *)aom_malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2);
402
8.82k
  *cb_col_buf =
403
8.82k
      (int *)aom_malloc(sizeof(**cb_col_buf) *
404
8.82k
                        (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
405
8.82k
                        (2 >> chroma_subsamp_x));
406
8.82k
  *cr_col_buf =
407
8.82k
      (int *)aom_malloc(sizeof(**cr_col_buf) *
408
8.82k
                        (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
409
8.82k
                        (2 >> chroma_subsamp_x));
410
411
8.82k
  *luma_grain_block =
412
8.82k
      (int *)aom_malloc(sizeof(**luma_grain_block) * luma_grain_samples);
413
8.82k
  *cb_grain_block =
414
8.82k
      (int *)aom_malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
415
8.82k
  *cr_grain_block =
416
8.82k
      (int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
417
8.82k
  if (!(*pred_pos_luma_p && *pred_pos_chroma_p && *y_line_buf && *cb_line_buf &&
418
8.82k
        *cr_line_buf && *y_col_buf && *cb_col_buf && *cr_col_buf &&
419
8.82k
        *luma_grain_block && *cb_grain_block && *cr_grain_block)) {
420
0
    dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p, luma_grain_block,
421
0
                   cb_grain_block, cr_grain_block, y_line_buf, cb_line_buf,
422
0
                   cr_line_buf, y_col_buf, cb_col_buf, cr_col_buf);
423
0
    return false;
424
0
  }
425
8.82k
  return true;
426
8.82k
}
427
428
// get a number between 0 and 2^bits - 1
429
73.4M
static inline int get_random_number(aom_grain_rng_t *rng, int bits) {
430
73.4M
  uint16_t bit;
431
73.4M
  bit = ((rng->random_register >> 0) ^ (rng->random_register >> 1) ^
432
73.4M
         (rng->random_register >> 3) ^ (rng->random_register >> 12)) &
433
73.4M
        1;
434
73.4M
  rng->random_register = (rng->random_register >> 1) | (bit << 15);
435
73.4M
  return (rng->random_register >> (16 - bits)) & ((1 << bits) - 1);
436
73.4M
}
437
438
static void init_random_generator(aom_grain_rng_t *rng, int luma_line,
439
26.5k
                                  uint16_t seed) {
440
  // same for the picture
441
442
26.5k
  uint16_t msb = (seed >> 8) & 255;
443
26.5k
  uint16_t lsb = seed & 255;
444
445
26.5k
  rng->random_register = (msb << 8) + lsb;
446
447
  //  changes for each row
448
26.5k
  int luma_num = luma_line >> 5;
449
450
26.5k
  rng->random_register ^= ((luma_num * 37 + 178) & 255) << 8;
451
26.5k
  rng->random_register ^= ((luma_num * 173 + 105) & 255);
452
26.5k
}
453
454
static void generate_luma_grain_block(
455
    const aom_film_grain_t *params, aom_grain_rng_t *rng, int **pred_pos_luma,
456
    int *luma_grain_block, int luma_block_size_y, int luma_block_size_x,
457
    int luma_grain_stride, int left_pad, int top_pad, int right_pad,
458
8.82k
    int bottom_pad) {
459
8.82k
  if (params->num_y_points == 0) {
460
1.94k
    memset(luma_grain_block, 0,
461
1.94k
           sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
462
1.94k
    return;
463
1.94k
  }
464
465
6.88k
  int bit_depth = params->bit_depth;
466
6.88k
  int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
467
468
6.88k
  int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
469
6.88k
  int rounding_offset = (1 << (params->ar_coeff_shift - 1));
470
471
6.88k
  const int grain_min = -(1 << (bit_depth - 1));
472
6.88k
  const int grain_max = (1 << (bit_depth - 1)) - 1;
473
474
509k
  for (int i = 0; i < luma_block_size_y; i++)
475
41.6M
    for (int j = 0; j < luma_block_size_x; j++)
476
41.1M
      luma_grain_block[i * luma_grain_stride + j] =
477
41.1M
          (gaussian_sequence[get_random_number(rng, gauss_bits)] +
478
41.1M
           ((1 << gauss_sec_shift) >> 1)) >>
479
41.1M
          gauss_sec_shift;
480
481
488k
  for (int i = top_pad; i < luma_block_size_y - bottom_pad; i++)
482
37.0M
    for (int j = left_pad; j < luma_block_size_x - right_pad; j++) {
483
36.6M
      int wsum = 0;
484
88.1M
      for (int pos = 0; pos < num_pos_luma; pos++) {
485
51.5M
        wsum = wsum + params->ar_coeffs_y[pos] *
486
51.5M
                          luma_grain_block[(i + pred_pos_luma[pos][0]) *
487
51.5M
                                               luma_grain_stride +
488
51.5M
                                           j + pred_pos_luma[pos][1]];
489
51.5M
      }
490
36.6M
      luma_grain_block[i * luma_grain_stride + j] =
491
36.6M
          clamp(luma_grain_block[i * luma_grain_stride + j] +
492
36.6M
                    ((wsum + rounding_offset) >> params->ar_coeff_shift),
493
36.6M
                grain_min, grain_max);
494
36.6M
    }
495
6.88k
}
496
497
static bool generate_chroma_grain_blocks(
498
    const aom_film_grain_t *params, aom_grain_rng_t *rng, int **pred_pos_chroma,
499
    int *luma_grain_block, int *cb_grain_block, int *cr_grain_block,
500
    int luma_grain_stride, int chroma_block_size_y, int chroma_block_size_x,
501
    int chroma_grain_stride, int left_pad, int top_pad, int right_pad,
502
8.82k
    int bottom_pad, int chroma_subsamp_y, int chroma_subsamp_x) {
503
8.82k
  int bit_depth = params->bit_depth;
504
8.82k
  int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
505
506
8.82k
  int num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
507
8.82k
  if (params->num_y_points > 0) ++num_pos_chroma;
508
8.82k
  int rounding_offset = (1 << (params->ar_coeff_shift - 1));
509
8.82k
  int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
510
511
8.82k
  const int grain_min = -(1 << (bit_depth - 1));
512
8.82k
  const int grain_max = (1 << (bit_depth - 1)) - 1;
513
514
8.82k
  if (params->num_cb_points || params->chroma_scaling_from_luma) {
515
5.85k
    init_random_generator(rng, 7 << 5, params->random_seed);
516
517
307k
    for (int i = 0; i < chroma_block_size_y; i++)
518
15.7M
      for (int j = 0; j < chroma_block_size_x; j++)
519
15.4M
        cb_grain_block[i * chroma_grain_stride + j] =
520
15.4M
            (gaussian_sequence[get_random_number(rng, gauss_bits)] +
521
15.4M
             ((1 << gauss_sec_shift) >> 1)) >>
522
15.4M
            gauss_sec_shift;
523
5.85k
  } else {
524
2.96k
    memset(cb_grain_block, 0,
525
2.96k
           sizeof(*cb_grain_block) * chroma_grain_block_size);
526
2.96k
  }
527
528
8.82k
  if (params->num_cr_points || params->chroma_scaling_from_luma) {
529
6.04k
    init_random_generator(rng, 11 << 5, params->random_seed);
530
531
320k
    for (int i = 0; i < chroma_block_size_y; i++)
532
17.1M
      for (int j = 0; j < chroma_block_size_x; j++)
533
16.8M
        cr_grain_block[i * chroma_grain_stride + j] =
534
16.8M
            (gaussian_sequence[get_random_number(rng, gauss_bits)] +
535
16.8M
             ((1 << gauss_sec_shift) >> 1)) >>
536
16.8M
            gauss_sec_shift;
537
6.04k
  } else {
538
2.78k
    memset(cr_grain_block, 0,
539
2.78k
           sizeof(*cr_grain_block) * chroma_grain_block_size);
540
2.78k
  }
541
542
478k
  for (int i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
543
26.4M
    for (int j = left_pad; j < chroma_block_size_x - right_pad; j++) {
544
25.9M
      int wsum_cb = 0;
545
25.9M
      int wsum_cr = 0;
546
76.2M
      for (int pos = 0; pos < num_pos_chroma; pos++) {
547
50.2M
        if (pred_pos_chroma[pos][2] == 0) {
548
28.7M
          wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] *
549
28.7M
                                  cb_grain_block[(i + pred_pos_chroma[pos][0]) *
550
28.7M
                                                     chroma_grain_stride +
551
28.7M
                                                 j + pred_pos_chroma[pos][1]];
552
28.7M
          wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] *
553
28.7M
                                  cr_grain_block[(i + pred_pos_chroma[pos][0]) *
554
28.7M
                                                     chroma_grain_stride +
555
28.7M
                                                 j + pred_pos_chroma[pos][1]];
556
28.7M
        } else if (pred_pos_chroma[pos][2] == 1) {
557
21.5M
          int av_luma = 0;
558
21.5M
          int luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad;
559
21.5M
          int luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad;
560
561
47.8M
          for (int k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1;
562
26.3M
               k++)
563
62.9M
            for (int l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1;
564
36.6M
                 l++)
565
36.6M
              av_luma += luma_grain_block[k * luma_grain_stride + l];
566
567
21.5M
          av_luma =
568
21.5M
              (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >>
569
21.5M
              (chroma_subsamp_y + chroma_subsamp_x);
570
571
21.5M
          wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
572
21.5M
          wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
573
21.5M
        } else {
574
0
          fprintf(
575
0
              stderr,
576
0
              "Grain synthesis: prediction between two chroma components is "
577
0
              "not supported!");
578
0
          return false;
579
0
        }
580
50.2M
      }
581
25.9M
      if (params->num_cb_points || params->chroma_scaling_from_luma)
582
12.8M
        cb_grain_block[i * chroma_grain_stride + j] =
583
12.8M
            clamp(cb_grain_block[i * chroma_grain_stride + j] +
584
12.8M
                      ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
585
12.8M
                  grain_min, grain_max);
586
25.9M
      if (params->num_cr_points || params->chroma_scaling_from_luma)
587
14.1M
        cr_grain_block[i * chroma_grain_stride + j] =
588
14.1M
            clamp(cr_grain_block[i * chroma_grain_stride + j] +
589
14.1M
                      ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
590
14.1M
                  grain_min, grain_max);
591
25.9M
    }
592
8.82k
  return true;
593
8.82k
}
594
595
static void init_scaling_function(const int scaling_points[][2], int num_points,
596
19.1k
                                  int scaling_lut[]) {
597
19.1k
  if (num_points == 0) return;
598
599
665k
  for (int i = 0; i < scaling_points[0][0]; i++)
600
653k
    scaling_lut[i] = scaling_points[0][1];
601
602
22.2k
  for (int point = 0; point < num_points - 1; point++) {
603
10.7k
    int delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
604
10.7k
    int delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
605
606
10.7k
    int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
607
608
1.07M
    for (int x = 0; x < delta_x; x++) {
609
1.06M
      scaling_lut[scaling_points[point][0] + x] =
610
1.06M
          scaling_points[point][1] + (int)((x * delta + 32768) >> 16);
611
1.06M
    }
612
10.7k
  }
613
614
1.21M
  for (int i = scaling_points[num_points - 1][0]; i < 256; i++)
615
1.20M
    scaling_lut[i] = scaling_points[num_points - 1][1];
616
11.4k
}
617
618
// function that extracts samples from a LUT (and interpolates intemediate
619
// frames for 10- and 12-bit video)
620
32.8M
static int scale_LUT(const int *scaling_lut, int index, int bit_depth) {
621
32.8M
  int x = index >> (bit_depth - 8);
622
623
32.8M
  if (!(bit_depth - 8) || x == 255)
624
10.4M
    return scaling_lut[x];
625
22.3M
  else
626
22.3M
    return scaling_lut[x] + (((scaling_lut[x + 1] - scaling_lut[x]) *
627
22.3M
                                  (index & ((1 << (bit_depth - 8)) - 1)) +
628
22.3M
                              (1 << (bit_depth - 9))) >>
629
22.3M
                             (bit_depth - 8));
630
32.8M
}
631
632
static void add_noise_to_block(const aom_film_grain_t *params,
633
                               const aom_grain_scaling_lut_t *scaling_lut,
634
                               uint8_t *luma, uint8_t *cb, uint8_t *cr,
635
                               int luma_stride, int chroma_stride,
636
                               int *luma_grain, int *cb_grain, int *cr_grain,
637
                               int luma_grain_stride, int chroma_grain_stride,
638
                               int half_luma_height, int half_luma_width,
639
                               int bit_depth, int chroma_subsamp_y,
640
42.5k
                               int chroma_subsamp_x, int mc_identity) {
641
42.5k
  int cb_mult = params->cb_mult - 128;            // fixed scale
642
42.5k
  int cb_luma_mult = params->cb_luma_mult - 128;  // fixed scale
643
42.5k
  int cb_offset = params->cb_offset - 256;
644
645
42.5k
  int cr_mult = params->cr_mult - 128;            // fixed scale
646
42.5k
  int cr_luma_mult = params->cr_luma_mult - 128;  // fixed scale
647
42.5k
  int cr_offset = params->cr_offset - 256;
648
649
42.5k
  int rounding_offset = (1 << (params->scaling_shift - 1));
650
651
42.5k
  int apply_y = params->num_y_points > 0 ? 1 : 0;
652
42.5k
  int apply_cb =
653
42.5k
      (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
654
42.5k
  int apply_cr =
655
42.5k
      (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
656
657
42.5k
  if (params->chroma_scaling_from_luma) {
658
3.42k
    cb_mult = 0;        // fixed scale
659
3.42k
    cb_luma_mult = 64;  // fixed scale
660
3.42k
    cb_offset = 0;
661
662
3.42k
    cr_mult = 0;        // fixed scale
663
3.42k
    cr_luma_mult = 64;  // fixed scale
664
3.42k
    cr_offset = 0;
665
3.42k
  }
666
667
42.5k
  int min_luma, max_luma, min_chroma, max_chroma;
668
669
42.5k
  if (params->clip_to_restricted_range) {
670
20.9k
    min_luma = min_luma_legal_range;
671
20.9k
    max_luma = max_luma_legal_range;
672
673
20.9k
    if (mc_identity) {
674
10.6k
      min_chroma = min_luma_legal_range;
675
10.6k
      max_chroma = max_luma_legal_range;
676
10.6k
    } else {
677
10.3k
      min_chroma = min_chroma_legal_range;
678
10.3k
      max_chroma = max_chroma_legal_range;
679
10.3k
    }
680
21.5k
  } else {
681
21.5k
    min_luma = min_chroma = 0;
682
21.5k
    max_luma = max_chroma = 255;
683
21.5k
  }
684
685
585k
  for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
686
7.93M
    for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
687
7.38M
      int average_luma = 0;
688
7.38M
      if (chroma_subsamp_x) {
689
3.43M
        average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
690
3.43M
                             (j << chroma_subsamp_x)] +
691
3.43M
                        luma[(i << chroma_subsamp_y) * luma_stride +
692
3.43M
                             (j << chroma_subsamp_x) + 1] +
693
3.43M
                        1) >>
694
3.43M
                       1;
695
3.95M
      } else {
696
3.95M
        average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
697
3.95M
      }
698
699
7.38M
      if (apply_cb) {
700
577k
        cb[i * chroma_stride + j] = clamp(
701
577k
            cb[i * chroma_stride + j] +
702
577k
                ((scale_LUT(scaling_lut->cb,
703
577k
                            clamp(((average_luma * cb_luma_mult +
704
577k
                                    cb_mult * cb[i * chroma_stride + j]) >>
705
577k
                                   6) +
706
577k
                                      cb_offset,
707
577k
                                  0, (256 << (bit_depth - 8)) - 1),
708
577k
                            8) *
709
577k
                      cb_grain[i * chroma_grain_stride + j] +
710
577k
                  rounding_offset) >>
711
577k
                 params->scaling_shift),
712
577k
            min_chroma, max_chroma);
713
577k
      }
714
715
7.38M
      if (apply_cr) {
716
584k
        cr[i * chroma_stride + j] = clamp(
717
584k
            cr[i * chroma_stride + j] +
718
584k
                ((scale_LUT(scaling_lut->cr,
719
584k
                            clamp(((average_luma * cr_luma_mult +
720
584k
                                    cr_mult * cr[i * chroma_stride + j]) >>
721
584k
                                   6) +
722
584k
                                      cr_offset,
723
584k
                                  0, (256 << (bit_depth - 8)) - 1),
724
584k
                            8) *
725
584k
                      cr_grain[i * chroma_grain_stride + j] +
726
584k
                  rounding_offset) >>
727
584k
                 params->scaling_shift),
728
584k
            min_chroma, max_chroma);
729
584k
      }
730
7.38M
    }
731
542k
  }
732
733
42.5k
  if (apply_y) {
734
545k
    for (int i = 0; i < (half_luma_height << 1); i++) {
735
9.18M
      for (int j = 0; j < (half_luma_width << 1); j++) {
736
8.67M
        luma[i * luma_stride + j] = clamp(
737
8.67M
            luma[i * luma_stride + j] +
738
8.67M
                ((scale_LUT(scaling_lut->y, luma[i * luma_stride + j], 8) *
739
8.67M
                      luma_grain[i * luma_grain_stride + j] +
740
8.67M
                  rounding_offset) >>
741
8.67M
                 params->scaling_shift),
742
8.67M
            min_luma, max_luma);
743
8.67M
      }
744
516k
    }
745
29.1k
  }
746
42.5k
}
747
748
static void add_noise_to_block_hbd(
749
    const aom_film_grain_t *params, const aom_grain_scaling_lut_t *scaling_lut,
750
    uint16_t *luma, uint16_t *cb, uint16_t *cr, int luma_stride,
751
    int chroma_stride, int *luma_grain, int *cb_grain, int *cr_grain,
752
    int luma_grain_stride, int chroma_grain_stride, int half_luma_height,
753
    int half_luma_width, int bit_depth, int chroma_subsamp_y,
754
44.4k
    int chroma_subsamp_x, int mc_identity) {
755
44.4k
  int cb_mult = params->cb_mult - 128;            // fixed scale
756
44.4k
  int cb_luma_mult = params->cb_luma_mult - 128;  // fixed scale
757
  // offset value depends on the bit depth
758
44.4k
  int cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
759
760
44.4k
  int cr_mult = params->cr_mult - 128;            // fixed scale
761
44.4k
  int cr_luma_mult = params->cr_luma_mult - 128;  // fixed scale
762
  // offset value depends on the bit depth
763
44.4k
  int cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
764
765
44.4k
  int rounding_offset = (1 << (params->scaling_shift - 1));
766
767
44.4k
  int apply_y = params->num_y_points > 0 ? 1 : 0;
768
44.4k
  int apply_cb =
769
44.4k
      (params->num_cb_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
770
44.4k
                                                                          : 0;
771
44.4k
  int apply_cr =
772
44.4k
      (params->num_cr_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
773
44.4k
                                                                          : 0;
774
775
44.4k
  if (params->chroma_scaling_from_luma) {
776
19.0k
    cb_mult = 0;        // fixed scale
777
19.0k
    cb_luma_mult = 64;  // fixed scale
778
19.0k
    cb_offset = 0;
779
780
19.0k
    cr_mult = 0;        // fixed scale
781
19.0k
    cr_luma_mult = 64;  // fixed scale
782
19.0k
    cr_offset = 0;
783
19.0k
  }
784
785
44.4k
  int min_luma, max_luma, min_chroma, max_chroma;
786
787
44.4k
  if (params->clip_to_restricted_range) {
788
11.0k
    min_luma = min_luma_legal_range << (bit_depth - 8);
789
11.0k
    max_luma = max_luma_legal_range << (bit_depth - 8);
790
791
11.0k
    if (mc_identity) {
792
3.44k
      min_chroma = min_luma_legal_range << (bit_depth - 8);
793
3.44k
      max_chroma = max_luma_legal_range << (bit_depth - 8);
794
7.60k
    } else {
795
7.60k
      min_chroma = min_chroma_legal_range << (bit_depth - 8);
796
7.60k
      max_chroma = max_chroma_legal_range << (bit_depth - 8);
797
7.60k
    }
798
33.4k
  } else {
799
33.4k
    min_luma = min_chroma = 0;
800
33.4k
    max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
801
33.4k
  }
802
803
673k
  for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
804
9.75M
    for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
805
9.12M
      int average_luma = 0;
806
9.12M
      if (chroma_subsamp_x) {
807
1.23M
        average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
808
1.23M
                             (j << chroma_subsamp_x)] +
809
1.23M
                        luma[(i << chroma_subsamp_y) * luma_stride +
810
1.23M
                             (j << chroma_subsamp_x) + 1] +
811
1.23M
                        1) >>
812
1.23M
                       1;
813
7.89M
      } else {
814
7.89M
        average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
815
7.89M
      }
816
817
9.12M
      if (apply_cb) {
818
5.84M
        cb[i * chroma_stride + j] = clamp(
819
5.84M
            cb[i * chroma_stride + j] +
820
5.84M
                ((scale_LUT(scaling_lut->cb,
821
5.84M
                            clamp(((average_luma * cb_luma_mult +
822
5.84M
                                    cb_mult * cb[i * chroma_stride + j]) >>
823
5.84M
                                   6) +
824
5.84M
                                      cb_offset,
825
5.84M
                                  0, (256 << (bit_depth - 8)) - 1),
826
5.84M
                            bit_depth) *
827
5.84M
                      cb_grain[i * chroma_grain_stride + j] +
828
5.84M
                  rounding_offset) >>
829
5.84M
                 params->scaling_shift),
830
5.84M
            min_chroma, max_chroma);
831
5.84M
      }
832
9.12M
      if (apply_cr) {
833
6.16M
        cr[i * chroma_stride + j] = clamp(
834
6.16M
            cr[i * chroma_stride + j] +
835
6.16M
                ((scale_LUT(scaling_lut->cr,
836
6.16M
                            clamp(((average_luma * cr_luma_mult +
837
6.16M
                                    cr_mult * cr[i * chroma_stride + j]) >>
838
6.16M
                                   6) +
839
6.16M
                                      cr_offset,
840
6.16M
                                  0, (256 << (bit_depth - 8)) - 1),
841
6.16M
                            bit_depth) *
842
6.16M
                      cr_grain[i * chroma_grain_stride + j] +
843
6.16M
                  rounding_offset) >>
844
6.16M
                 params->scaling_shift),
845
6.16M
            min_chroma, max_chroma);
846
6.16M
      }
847
9.12M
    }
848
629k
  }
849
850
44.4k
  if (apply_y) {
851
724k
    for (int i = 0; i < (half_luma_height << 1); i++) {
852
11.7M
      for (int j = 0; j < (half_luma_width << 1); j++) {
853
11.0M
        luma[i * luma_stride + j] =
854
11.0M
            clamp(luma[i * luma_stride + j] +
855
11.0M
                      ((scale_LUT(scaling_lut->y, luma[i * luma_stride + j],
856
11.0M
                                  bit_depth) *
857
11.0M
                            luma_grain[i * luma_grain_stride + j] +
858
11.0M
                        rounding_offset) >>
859
11.0M
                       params->scaling_shift),
860
11.0M
                  min_luma, max_luma);
861
11.0M
      }
862
682k
    }
863
41.8k
  }
864
44.4k
}
865
866
static void copy_rect(uint8_t *src, int src_stride, uint8_t *dst,
867
                      int dst_stride, int width, int height,
868
26.3k
                      int use_high_bit_depth) {
869
26.3k
  int hbd_coeff = use_high_bit_depth ? 2 : 1;
870
652k
  while (height) {
871
625k
    memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
872
625k
    src += src_stride;
873
625k
    dst += dst_stride;
874
625k
    --height;
875
625k
  }
876
26.3k
  return;
877
26.3k
}
878
879
static void copy_area(int *src, int src_stride, int *dst, int dst_stride,
880
229k
                      int width, int height) {
881
2.41M
  while (height) {
882
2.18M
    memcpy(dst, src, width * sizeof(*src));
883
2.18M
    src += src_stride;
884
2.18M
    dst += dst_stride;
885
2.18M
    --height;
886
2.18M
  }
887
229k
  return;
888
229k
}
889
890
static void extend_even(uint8_t *dst, int dst_stride, int width, int height,
891
8.82k
                        int use_high_bit_depth) {
892
8.82k
  if ((width & 1) == 0 && (height & 1) == 0) return;
893
7.69k
  if (use_high_bit_depth) {
894
4.62k
    uint16_t *dst16 = (uint16_t *)dst;
895
4.62k
    int dst16_stride = dst_stride / 2;
896
4.62k
    if (width & 1) {
897
88.2k
      for (int i = 0; i < height; ++i)
898
83.7k
        dst16[i * dst16_stride + width] = dst16[i * dst16_stride + width - 1];
899
4.56k
    }
900
4.62k
    width = (width + 1) & (~1);
901
4.62k
    if (height & 1) {
902
156
      memcpy(&dst16[height * dst16_stride], &dst16[(height - 1) * dst16_stride],
903
156
             sizeof(*dst16) * width);
904
156
    }
905
4.62k
  } else {
906
3.06k
    if (width & 1) {
907
78.4k
      for (int i = 0; i < height; ++i)
908
76.6k
        dst[i * dst_stride + width] = dst[i * dst_stride + width - 1];
909
1.77k
    }
910
3.06k
    width = (width + 1) & (~1);
911
3.06k
    if (height & 1) {
912
1.89k
      memcpy(&dst[height * dst_stride], &dst[(height - 1) * dst_stride],
913
1.89k
             sizeof(*dst) * width);
914
1.89k
    }
915
3.06k
  }
916
7.69k
}
917
918
static void ver_boundary_overlap(int *left_block, int left_stride,
919
                                 int *right_block, int right_stride,
920
                                 int *dst_block, int dst_stride, int width,
921
57.7k
                                 int height, int grain_min, int grain_max) {
922
57.7k
  if (width == 1) {
923
204k
    while (height) {
924
190k
      *dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5,
925
190k
                         grain_min, grain_max);
926
190k
      left_block += left_stride;
927
190k
      right_block += right_stride;
928
190k
      dst_block += dst_stride;
929
190k
      --height;
930
190k
    }
931
14.2k
    return;
932
43.4k
  } else if (width == 2) {
933
1.22M
    while (height) {
934
1.18M
      dst_block[0] = clamp((27 * left_block[0] + 17 * right_block[0] + 16) >> 5,
935
1.18M
                           grain_min, grain_max);
936
1.18M
      dst_block[1] = clamp((17 * left_block[1] + 27 * right_block[1] + 16) >> 5,
937
1.18M
                           grain_min, grain_max);
938
1.18M
      left_block += left_stride;
939
1.18M
      right_block += right_stride;
940
1.18M
      dst_block += dst_stride;
941
1.18M
      --height;
942
1.18M
    }
943
43.4k
    return;
944
43.4k
  }
945
57.7k
}
946
947
static void hor_boundary_overlap(int *top_block, int top_stride,
948
                                 int *bottom_block, int bottom_stride,
949
                                 int *dst_block, int dst_stride, int width,
950
96.3k
                                 int height, int grain_min, int grain_max) {
951
96.3k
  if (height == 1) {
952
201k
    while (width) {
953
179k
      *dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5,
954
179k
                         grain_min, grain_max);
955
179k
      ++top_block;
956
179k
      ++bottom_block;
957
179k
      ++dst_block;
958
179k
      --width;
959
179k
    }
960
21.7k
    return;
961
74.6k
  } else if (height == 2) {
962
1.22M
    while (width) {
963
1.14M
      dst_block[0] = clamp((27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5,
964
1.14M
                           grain_min, grain_max);
965
1.14M
      dst_block[dst_stride] = clamp((17 * top_block[top_stride] +
966
1.14M
                                     27 * bottom_block[bottom_stride] + 16) >>
967
1.14M
                                        5,
968
1.14M
                                    grain_min, grain_max);
969
1.14M
      ++top_block;
970
1.14M
      ++bottom_block;
971
1.14M
      ++dst_block;
972
1.14M
      --width;
973
1.14M
    }
974
74.6k
    return;
975
74.6k
  }
976
96.3k
}
977
978
/*!\brief Add film grain
979
 *
980
 * Add film grain to an image
981
 *
982
 * Returns 0 for success, -1 for failure
983
 *
984
 * \param[in]    grain_params     Grain parameters
985
 * \param[in]    luma             luma plane
986
 * \param[in]    cb               cb plane
987
 * \param[in]    cr               cr plane
988
 * \param[in]    height           luma plane height
989
 * \param[in]    width            luma plane width
990
 * \param[in]    luma_stride      luma plane stride
991
 * \param[in]    chroma_stride    chroma plane stride
992
 */
993
static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
994
                              uint8_t *cb, uint8_t *cr, int height, int width,
995
                              int luma_stride, int chroma_stride,
996
                              int use_high_bit_depth, int chroma_subsamp_y,
997
8.82k
                              int chroma_subsamp_x, int mc_identity) {
998
8.82k
  int **pred_pos_luma;
999
8.82k
  int **pred_pos_chroma;
1000
8.82k
  int *luma_grain_block;
1001
8.82k
  int *cb_grain_block;
1002
8.82k
  int *cr_grain_block;
1003
1004
8.82k
  int *y_line_buf;
1005
8.82k
  int *cb_line_buf;
1006
8.82k
  int *cr_line_buf;
1007
1008
8.82k
  int *y_col_buf;
1009
8.82k
  int *cb_col_buf;
1010
8.82k
  int *cr_col_buf;
1011
1012
8.82k
  aom_grain_scaling_lut_t scaling_lut;
1013
8.82k
  memset(&scaling_lut, 0, sizeof(scaling_lut));
1014
1015
8.82k
  aom_grain_rng_t rng;
1016
8.82k
  rng.random_register = params->random_seed;
1017
1018
8.82k
  int left_pad = 3;
1019
8.82k
  int right_pad = 3;  // padding to offset for AR coefficients
1020
8.82k
  int top_pad = 3;
1021
8.82k
  int bottom_pad = 0;
1022
1023
8.82k
  int ar_padding = 3;  // maximum lag used for stabilization of AR coefficients
1024
1025
8.82k
  const int chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
1026
8.82k
  const int chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
1027
1028
  // Initial padding is only needed for generation of
1029
  // film grain templates (to stabilize the AR process)
1030
  // Only a 64x64 luma and 32x32 chroma part of a template
1031
  // is used later for adding grain, padding can be discarded
1032
1033
8.82k
  int luma_block_size_y =
1034
8.82k
      top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad;
1035
8.82k
  int luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 +
1036
8.82k
                          2 * ar_padding + right_pad;
1037
1038
8.82k
  int chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
1039
8.82k
                            chroma_subblock_size_y * 2 + bottom_pad;
1040
8.82k
  int chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1041
8.82k
                            chroma_subblock_size_x * 2 +
1042
8.82k
                            (2 >> chroma_subsamp_x) * ar_padding + right_pad;
1043
1044
8.82k
  int luma_grain_stride = luma_block_size_x;
1045
8.82k
  int chroma_grain_stride = chroma_block_size_x;
1046
1047
8.82k
  int overlap = params->overlap_flag;
1048
8.82k
  int bit_depth = params->bit_depth;
1049
1050
8.82k
  int grain_min = -(1 << (bit_depth - 1));
1051
8.82k
  int grain_max = (1 << (bit_depth - 1)) - 1;
1052
1053
8.82k
  if (!init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
1054
8.82k
                   &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
1055
8.82k
                   &cr_grain_block, &y_line_buf, &cb_line_buf, &cr_line_buf,
1056
8.82k
                   &y_col_buf, &cb_col_buf, &cr_col_buf,
1057
8.82k
                   luma_block_size_y * luma_block_size_x,
1058
8.82k
                   chroma_block_size_y * chroma_block_size_x, chroma_subsamp_y,
1059
8.82k
                   chroma_subsamp_x))
1060
0
    return -1;
1061
1062
8.82k
  generate_luma_grain_block(params, &rng, pred_pos_luma, luma_grain_block,
1063
8.82k
                            luma_block_size_y, luma_block_size_x,
1064
8.82k
                            luma_grain_stride, left_pad, top_pad, right_pad,
1065
8.82k
                            bottom_pad);
1066
1067
8.82k
  if (!generate_chroma_grain_blocks(
1068
8.82k
          params, &rng, pred_pos_chroma, luma_grain_block, cb_grain_block,
1069
8.82k
          cr_grain_block, luma_grain_stride, chroma_block_size_y,
1070
8.82k
          chroma_block_size_x, chroma_grain_stride, left_pad, top_pad,
1071
8.82k
          right_pad, bottom_pad, chroma_subsamp_y, chroma_subsamp_x))
1072
0
    return -1;
1073
1074
8.82k
  init_scaling_function(params->scaling_points_y, params->num_y_points,
1075
8.82k
                        scaling_lut.y);
1076
1077
8.82k
  if (params->chroma_scaling_from_luma) {
1078
3.67k
    static_assert(sizeof(scaling_lut.cb) == sizeof(scaling_lut.y), "");
1079
3.67k
    static_assert(sizeof(scaling_lut.cr) == sizeof(scaling_lut.y), "");
1080
3.67k
    memcpy(scaling_lut.cb, scaling_lut.y, sizeof(scaling_lut.y));
1081
3.67k
    memcpy(scaling_lut.cr, scaling_lut.y, sizeof(scaling_lut.y));
1082
5.15k
  } else {
1083
5.15k
    init_scaling_function(params->scaling_points_cb, params->num_cb_points,
1084
5.15k
                          scaling_lut.cb);
1085
5.15k
    init_scaling_function(params->scaling_points_cr, params->num_cr_points,
1086
5.15k
                          scaling_lut.cr);
1087
5.15k
  }
1088
23.5k
  for (int y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
1089
14.6k
    init_random_generator(&rng, y * 2, params->random_seed);
1090
1091
63.8k
    for (int x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
1092
49.2k
      int offset_y = get_random_number(&rng, 8);
1093
49.2k
      int offset_x = (offset_y >> 4) & 15;
1094
49.2k
      offset_y &= 15;
1095
1096
49.2k
      int luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
1097
49.2k
      int luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
1098
1099
49.2k
      int chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
1100
49.2k
                            offset_y * (2 >> chroma_subsamp_y);
1101
49.2k
      int chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1102
49.2k
                            offset_x * (2 >> chroma_subsamp_x);
1103
1104
49.2k
      if (overlap && x) {
1105
19.2k
        ver_boundary_overlap(
1106
19.2k
            y_col_buf, 2,
1107
19.2k
            luma_grain_block + luma_offset_y * luma_grain_stride +
1108
19.2k
                luma_offset_x,
1109
19.2k
            luma_grain_stride, y_col_buf, 2, 2,
1110
19.2k
            AOMMIN(luma_subblock_size_y + 2, height - (y << 1)), grain_min,
1111
19.2k
            grain_max);
1112
1113
19.2k
        ver_boundary_overlap(
1114
19.2k
            cb_col_buf, 2 >> chroma_subsamp_x,
1115
19.2k
            cb_grain_block + chroma_offset_y * chroma_grain_stride +
1116
19.2k
                chroma_offset_x,
1117
19.2k
            chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1118
19.2k
            2 >> chroma_subsamp_x,
1119
19.2k
            AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1120
19.2k
                   (height - (y << 1)) >> chroma_subsamp_y),
1121
19.2k
            grain_min, grain_max);
1122
1123
19.2k
        ver_boundary_overlap(
1124
19.2k
            cr_col_buf, 2 >> chroma_subsamp_x,
1125
19.2k
            cr_grain_block + chroma_offset_y * chroma_grain_stride +
1126
19.2k
                chroma_offset_x,
1127
19.2k
            chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1128
19.2k
            2 >> chroma_subsamp_x,
1129
19.2k
            AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1130
19.2k
                   (height - (y << 1)) >> chroma_subsamp_y),
1131
19.2k
            grain_min, grain_max);
1132
1133
19.2k
        int i = y ? 1 : 0;
1134
1135
19.2k
        if (use_high_bit_depth) {
1136
11.5k
          add_noise_to_block_hbd(
1137
11.5k
              params, &scaling_lut,
1138
11.5k
              (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
1139
11.5k
              (uint16_t *)cb +
1140
11.5k
                  ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1141
11.5k
                  (x << (1 - chroma_subsamp_x)),
1142
11.5k
              (uint16_t *)cr +
1143
11.5k
                  ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1144
11.5k
                  (x << (1 - chroma_subsamp_x)),
1145
11.5k
              luma_stride, chroma_stride, y_col_buf + i * 4,
1146
11.5k
              cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1147
11.5k
              cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1148
11.5k
              2, (2 - chroma_subsamp_x),
1149
11.5k
              AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
1150
11.5k
              bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1151
11.5k
        } else {
1152
7.68k
          add_noise_to_block(
1153
7.68k
              params, &scaling_lut,
1154
7.68k
              luma + ((y + i) << 1) * luma_stride + (x << 1),
1155
7.68k
              cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1156
7.68k
                  (x << (1 - chroma_subsamp_x)),
1157
7.68k
              cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1158
7.68k
                  (x << (1 - chroma_subsamp_x)),
1159
7.68k
              luma_stride, chroma_stride, y_col_buf + i * 4,
1160
7.68k
              cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1161
7.68k
              cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1162
7.68k
              2, (2 - chroma_subsamp_x),
1163
7.68k
              AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
1164
7.68k
              bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1165
7.68k
        }
1166
19.2k
      }
1167
1168
49.2k
      if (overlap && y) {
1169
18.5k
        if (x) {
1170
13.5k
          hor_boundary_overlap(y_line_buf + (x << 1), luma_stride, y_col_buf, 2,
1171
13.5k
                               y_line_buf + (x << 1), luma_stride, 2, 2,
1172
13.5k
                               grain_min, grain_max);
1173
1174
13.5k
          hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
1175
13.5k
                               chroma_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1176
13.5k
                               cb_line_buf + x * (2 >> chroma_subsamp_x),
1177
13.5k
                               chroma_stride, 2 >> chroma_subsamp_x,
1178
13.5k
                               2 >> chroma_subsamp_y, grain_min, grain_max);
1179
1180
13.5k
          hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
1181
13.5k
                               chroma_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1182
13.5k
                               cr_line_buf + x * (2 >> chroma_subsamp_x),
1183
13.5k
                               chroma_stride, 2 >> chroma_subsamp_x,
1184
13.5k
                               2 >> chroma_subsamp_y, grain_min, grain_max);
1185
13.5k
        }
1186
1187
18.5k
        hor_boundary_overlap(
1188
18.5k
            y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1189
18.5k
            luma_grain_block + luma_offset_y * luma_grain_stride +
1190
18.5k
                luma_offset_x + (x ? 2 : 0),
1191
18.5k
            luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1192
18.5k
            AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
1193
18.5k
                   width - ((x ? x + 1 : 0) << 1)),
1194
18.5k
            2, grain_min, grain_max);
1195
1196
18.5k
        hor_boundary_overlap(
1197
18.5k
            cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1198
18.5k
            chroma_stride,
1199
18.5k
            cb_grain_block + chroma_offset_y * chroma_grain_stride +
1200
18.5k
                chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1201
18.5k
            chroma_grain_stride,
1202
18.5k
            cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1203
18.5k
            chroma_stride,
1204
18.5k
            AOMMIN(chroma_subblock_size_x -
1205
18.5k
                       ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1206
18.5k
                   (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1207
18.5k
            2 >> chroma_subsamp_y, grain_min, grain_max);
1208
1209
18.5k
        hor_boundary_overlap(
1210
18.5k
            cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1211
18.5k
            chroma_stride,
1212
18.5k
            cr_grain_block + chroma_offset_y * chroma_grain_stride +
1213
18.5k
                chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1214
18.5k
            chroma_grain_stride,
1215
18.5k
            cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1216
18.5k
            chroma_stride,
1217
18.5k
            AOMMIN(chroma_subblock_size_x -
1218
18.5k
                       ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1219
18.5k
                   (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1220
18.5k
            2 >> chroma_subsamp_y, grain_min, grain_max);
1221
1222
18.5k
        if (use_high_bit_depth) {
1223
10.3k
          add_noise_to_block_hbd(
1224
10.3k
              params, &scaling_lut,
1225
10.3k
              (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
1226
10.3k
              (uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1227
10.3k
                  (x << ((1 - chroma_subsamp_x))),
1228
10.3k
              (uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1229
10.3k
                  (x << ((1 - chroma_subsamp_x))),
1230
10.3k
              luma_stride, chroma_stride, y_line_buf + (x << 1),
1231
10.3k
              cb_line_buf + (x << (1 - chroma_subsamp_x)),
1232
10.3k
              cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
1233
10.3k
              chroma_stride, 1,
1234
10.3k
              AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
1235
10.3k
              chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1236
10.3k
        } else {
1237
8.17k
          add_noise_to_block(
1238
8.17k
              params, &scaling_lut, luma + (y << 1) * luma_stride + (x << 1),
1239
8.17k
              cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1240
8.17k
                  (x << ((1 - chroma_subsamp_x))),
1241
8.17k
              cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1242
8.17k
                  (x << ((1 - chroma_subsamp_x))),
1243
8.17k
              luma_stride, chroma_stride, y_line_buf + (x << 1),
1244
8.17k
              cb_line_buf + (x << (1 - chroma_subsamp_x)),
1245
8.17k
              cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
1246
8.17k
              chroma_stride, 1,
1247
8.17k
              AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
1248
8.17k
              chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1249
8.17k
        }
1250
18.5k
      }
1251
1252
49.2k
      int i = overlap && y ? 1 : 0;
1253
49.2k
      int j = overlap && x ? 1 : 0;
1254
1255
49.2k
      if (use_high_bit_depth) {
1256
22.5k
        add_noise_to_block_hbd(
1257
22.5k
            params, &scaling_lut,
1258
22.5k
            (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1259
22.5k
            (uint16_t *)cb +
1260
22.5k
                ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1261
22.5k
                ((x + j) << (1 - chroma_subsamp_x)),
1262
22.5k
            (uint16_t *)cr +
1263
22.5k
                ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1264
22.5k
                ((x + j) << (1 - chroma_subsamp_x)),
1265
22.5k
            luma_stride, chroma_stride,
1266
22.5k
            luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1267
22.5k
                luma_offset_x + (j << 1),
1268
22.5k
            cb_grain_block +
1269
22.5k
                (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1270
22.5k
                    chroma_grain_stride +
1271
22.5k
                chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1272
22.5k
            cr_grain_block +
1273
22.5k
                (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1274
22.5k
                    chroma_grain_stride +
1275
22.5k
                chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1276
22.5k
            luma_grain_stride, chroma_grain_stride,
1277
22.5k
            AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1278
22.5k
            AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
1279
22.5k
            chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1280
26.7k
      } else {
1281
26.7k
        add_noise_to_block(
1282
26.7k
            params, &scaling_lut,
1283
26.7k
            luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1284
26.7k
            cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1285
26.7k
                ((x + j) << (1 - chroma_subsamp_x)),
1286
26.7k
            cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1287
26.7k
                ((x + j) << (1 - chroma_subsamp_x)),
1288
26.7k
            luma_stride, chroma_stride,
1289
26.7k
            luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1290
26.7k
                luma_offset_x + (j << 1),
1291
26.7k
            cb_grain_block +
1292
26.7k
                (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1293
26.7k
                    chroma_grain_stride +
1294
26.7k
                chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1295
26.7k
            cr_grain_block +
1296
26.7k
                (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1297
26.7k
                    chroma_grain_stride +
1298
26.7k
                chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1299
26.7k
            luma_grain_stride, chroma_grain_stride,
1300
26.7k
            AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1301
26.7k
            AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
1302
26.7k
            chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1303
26.7k
      }
1304
1305
49.2k
      if (overlap) {
1306
28.6k
        if (x) {
1307
          // Copy overlapped column bufer to line buffer
1308
19.2k
          copy_area(y_col_buf + (luma_subblock_size_y << 1), 2,
1309
19.2k
                    y_line_buf + (x << 1), luma_stride, 2, 2);
1310
1311
19.2k
          copy_area(
1312
19.2k
              cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1313
19.2k
              2 >> chroma_subsamp_x,
1314
19.2k
              cb_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
1315
19.2k
              2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
1316
1317
19.2k
          copy_area(
1318
19.2k
              cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1319
19.2k
              2 >> chroma_subsamp_x,
1320
19.2k
              cr_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
1321
19.2k
              2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
1322
19.2k
        }
1323
1324
        // Copy grain to the line buffer for overlap with a bottom block
1325
28.6k
        copy_area(
1326
28.6k
            luma_grain_block +
1327
28.6k
                (luma_offset_y + luma_subblock_size_y) * luma_grain_stride +
1328
28.6k
                luma_offset_x + ((x ? 2 : 0)),
1329
28.6k
            luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1330
28.6k
            AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0), 2);
1331
1332
28.6k
        copy_area(cb_grain_block +
1333
28.6k
                      (chroma_offset_y + chroma_subblock_size_y) *
1334
28.6k
                          chroma_grain_stride +
1335
28.6k
                      chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1336
28.6k
                  chroma_grain_stride,
1337
28.6k
                  cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1338
28.6k
                  chroma_stride,
1339
28.6k
                  AOMMIN(chroma_subblock_size_x,
1340
28.6k
                         ((width - (x << 1)) >> chroma_subsamp_x)) -
1341
28.6k
                      (x ? 2 >> chroma_subsamp_x : 0),
1342
28.6k
                  2 >> chroma_subsamp_y);
1343
1344
28.6k
        copy_area(cr_grain_block +
1345
28.6k
                      (chroma_offset_y + chroma_subblock_size_y) *
1346
28.6k
                          chroma_grain_stride +
1347
28.6k
                      chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1348
28.6k
                  chroma_grain_stride,
1349
28.6k
                  cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1350
28.6k
                  chroma_stride,
1351
28.6k
                  AOMMIN(chroma_subblock_size_x,
1352
28.6k
                         ((width - (x << 1)) >> chroma_subsamp_x)) -
1353
28.6k
                      (x ? 2 >> chroma_subsamp_x : 0),
1354
28.6k
                  2 >> chroma_subsamp_y);
1355
1356
        // Copy grain to the column buffer for overlap with the next block to
1357
        // the right
1358
1359
28.6k
        copy_area(luma_grain_block + luma_offset_y * luma_grain_stride +
1360
28.6k
                      luma_offset_x + luma_subblock_size_x,
1361
28.6k
                  luma_grain_stride, y_col_buf, 2, 2,
1362
28.6k
                  AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1363
1364
28.6k
        copy_area(cb_grain_block + chroma_offset_y * chroma_grain_stride +
1365
28.6k
                      chroma_offset_x + chroma_subblock_size_x,
1366
28.6k
                  chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1367
28.6k
                  2 >> chroma_subsamp_x,
1368
28.6k
                  AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1369
28.6k
                         (height - (y << 1)) >> chroma_subsamp_y));
1370
1371
28.6k
        copy_area(cr_grain_block + chroma_offset_y * chroma_grain_stride +
1372
28.6k
                      chroma_offset_x + chroma_subblock_size_x,
1373
28.6k
                  chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1374
28.6k
                  2 >> chroma_subsamp_x,
1375
28.6k
                  AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1376
28.6k
                         (height - (y << 1)) >> chroma_subsamp_y));
1377
28.6k
      }
1378
49.2k
    }
1379
14.6k
  }
1380
1381
8.82k
  dealloc_arrays(params, &pred_pos_luma, &pred_pos_chroma, &luma_grain_block,
1382
8.82k
                 &cb_grain_block, &cr_grain_block, &y_line_buf, &cb_line_buf,
1383
8.82k
                 &cr_line_buf, &y_col_buf, &cb_col_buf, &cr_col_buf);
1384
8.82k
  return 0;
1385
8.82k
}
1386
1387
int av1_add_film_grain(const aom_film_grain_t *params, const aom_image_t *src,
1388
8.82k
                       aom_image_t *dst) {
1389
8.82k
  uint8_t *luma, *cb, *cr;
1390
8.82k
  int height, width, luma_stride, chroma_stride;
1391
8.82k
  int use_high_bit_depth = 0;
1392
8.82k
  int chroma_subsamp_x = 0;
1393
8.82k
  int chroma_subsamp_y = 0;
1394
8.82k
  int mc_identity = src->mc == AOM_CICP_MC_IDENTITY ? 1 : 0;
1395
1396
8.82k
  switch (src->fmt) {
1397
1.31k
    case AOM_IMG_FMT_I420:
1398
1.31k
      use_high_bit_depth = 0;
1399
1.31k
      chroma_subsamp_x = 1;
1400
1.31k
      chroma_subsamp_y = 1;
1401
1.31k
      break;
1402
2.92k
    case AOM_IMG_FMT_I42016:
1403
2.92k
      use_high_bit_depth = 1;
1404
2.92k
      chroma_subsamp_x = 1;
1405
2.92k
      chroma_subsamp_y = 1;
1406
2.92k
      break;
1407
      //    case AOM_IMG_FMT_444A:
1408
1.32k
    case AOM_IMG_FMT_I444:
1409
1.32k
      use_high_bit_depth = 0;
1410
1.32k
      chroma_subsamp_x = 0;
1411
1.32k
      chroma_subsamp_y = 0;
1412
1.32k
      break;
1413
1.73k
    case AOM_IMG_FMT_I44416:
1414
1.73k
      use_high_bit_depth = 1;
1415
1.73k
      chroma_subsamp_x = 0;
1416
1.73k
      chroma_subsamp_y = 0;
1417
1.73k
      break;
1418
1.33k
    case AOM_IMG_FMT_I422:
1419
1.33k
      use_high_bit_depth = 0;
1420
1.33k
      chroma_subsamp_x = 1;
1421
1.33k
      chroma_subsamp_y = 0;
1422
1.33k
      break;
1423
191
    case AOM_IMG_FMT_I42216:
1424
191
      use_high_bit_depth = 1;
1425
191
      chroma_subsamp_x = 1;
1426
191
      chroma_subsamp_y = 0;
1427
191
      break;
1428
0
    default:  // unknown input format
1429
0
      fprintf(stderr, "Film grain error: input format is not supported!");
1430
0
      return -1;
1431
8.82k
  }
1432
1433
8.82k
  assert(params->bit_depth == src->bit_depth);
1434
1435
8.82k
  dst->fmt = src->fmt;
1436
8.82k
  dst->bit_depth = src->bit_depth;
1437
1438
8.82k
  dst->r_w = src->r_w;
1439
8.82k
  dst->r_h = src->r_h;
1440
8.82k
  dst->d_w = src->d_w;
1441
8.82k
  dst->d_h = src->d_h;
1442
1443
8.82k
  dst->cp = src->cp;
1444
8.82k
  dst->tc = src->tc;
1445
8.82k
  dst->mc = src->mc;
1446
1447
8.82k
  dst->monochrome = src->monochrome;
1448
8.82k
  dst->csp = src->csp;
1449
8.82k
  dst->range = src->range;
1450
1451
8.82k
  dst->x_chroma_shift = src->x_chroma_shift;
1452
8.82k
  dst->y_chroma_shift = src->y_chroma_shift;
1453
1454
8.82k
  dst->temporal_id = src->temporal_id;
1455
8.82k
  dst->spatial_id = src->spatial_id;
1456
1457
8.82k
  width = src->d_w % 2 ? src->d_w + 1 : src->d_w;
1458
8.82k
  height = src->d_h % 2 ? src->d_h + 1 : src->d_h;
1459
1460
8.82k
  copy_rect(src->planes[AOM_PLANE_Y], src->stride[AOM_PLANE_Y],
1461
8.82k
            dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
1462
8.82k
            src->d_h, use_high_bit_depth);
1463
  // Note that dst is already assumed to be aligned to even.
1464
8.82k
  extend_even(dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
1465
8.82k
              src->d_h, use_high_bit_depth);
1466
1467
8.82k
  if (!src->monochrome) {
1468
8.78k
    copy_rect(src->planes[AOM_PLANE_U], src->stride[AOM_PLANE_U],
1469
8.78k
              dst->planes[AOM_PLANE_U], dst->stride[AOM_PLANE_U],
1470
8.78k
              width >> chroma_subsamp_x, height >> chroma_subsamp_y,
1471
8.78k
              use_high_bit_depth);
1472
1473
8.78k
    copy_rect(src->planes[AOM_PLANE_V], src->stride[AOM_PLANE_V],
1474
8.78k
              dst->planes[AOM_PLANE_V], dst->stride[AOM_PLANE_V],
1475
8.78k
              width >> chroma_subsamp_x, height >> chroma_subsamp_y,
1476
8.78k
              use_high_bit_depth);
1477
8.78k
  }
1478
1479
8.82k
  luma = dst->planes[AOM_PLANE_Y];
1480
8.82k
  cb = dst->planes[AOM_PLANE_U];
1481
8.82k
  cr = dst->planes[AOM_PLANE_V];
1482
1483
  // luma and chroma strides in samples
1484
8.82k
  luma_stride = dst->stride[AOM_PLANE_Y] >> use_high_bit_depth;
1485
8.82k
  chroma_stride = dst->stride[AOM_PLANE_U] >> use_high_bit_depth;
1486
1487
8.82k
  return add_film_grain_run(params, luma, cb, cr, height, width, luma_stride,
1488
8.82k
                            chroma_stride, use_high_bit_depth, chroma_subsamp_y,
1489
8.82k
                            chroma_subsamp_x, mc_identity);
1490
8.82k
}