Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/grainSynthesis.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
 */
11
12
/*!\file
13
  * \brief Describes film grain parameters and film grain synthesis
14
  *
15
  */
16
17
#include <stdio.h>
18
#include <string.h>
19
#include <stdlib.h>
20
#include "grainSynthesis.h"
21
#include "common_dsp_rtcd.h"
22
#include "definitions.h"
23
#include "svt_log.h"
24
25
// Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
26
// with zero mean and standard deviation of about 512.
27
// should be divided by 4 for 10-bit range and 16 for 8-bit range.
28
static const int32_t gaussian_sequence[2048] = {
29
    56,    568,   -180,  172,   124,   -84,   172,   -64,   -900,  24,    820,   224,   1248,  996,   272,   -8,
30
    -916,  -388,  -732,  -104,  -188,  800,   112,   -652,  -320,  -376,  140,   -252,  492,   -168,  44,    -788,
31
    588,   -584,  500,   -228,  12,    680,   272,   -476,  972,   -100,  652,   368,   432,   -196,  -720,  -192,
32
    1000,  -332,  652,   -136,  -552,  -604,  -4,    192,   -220,  -136,  1000,  -52,   372,   -96,   -624,  124,
33
    -24,   396,   540,   -12,   -104,  640,   464,   244,   -208,  -84,   368,   -528,  -740,  248,   -968,  -848,
34
    608,   376,   -60,   -292,  -40,   -156,  252,   -292,  248,   224,   -280,  400,   -244,  244,   -60,   76,
35
    -80,   212,   532,   340,   128,   -36,   824,   -352,  -60,   -264,  -96,   -612,  416,   -704,  220,   -204,
36
    640,   -160,  1220,  -408,  900,   336,   20,    -336,  -96,   -792,  304,   48,    -28,   -1232, -1172, -448,
37
    104,   -292,  -520,  244,   60,    -948,  0,     -708,  268,   108,   356,   -548,  488,   -344,  -136,  488,
38
    -196,  -224,  656,   -236,  -1128, 60,    4,     140,   276,   -676,  -376,  168,   -108,  464,   8,     564,
39
    64,    240,   308,   -300,  -400,  -456,  -136,  56,    120,   -408,  -116,  436,   504,   -232,  328,   844,
40
    -164,  -84,   784,   -168,  232,   -224,  348,   -376,  128,   568,   96,    -1244, -288,  276,   848,   832,
41
    -360,  656,   464,   -384,  -332,  -356,  728,   -388,  160,   -192,  468,   296,   224,   140,   -776,  -100,
42
    280,   4,     196,   44,    -36,   -648,  932,   16,    1428,  28,    528,   808,   772,   20,    268,   88,
43
    -332,  -284,  124,   -384,  -448,  208,   -228,  -1044, -328,  660,   380,   -148,  -300,  588,   240,   540,
44
    28,    136,   -88,   -436,  256,   296,   -1000, 1400,  0,     -48,   1056,  -136,  264,   -528,  -1108, 632,
45
    -484,  -592,  -344,  796,   124,   -668,  -768,  388,   1296,  -232,  -188,  -200,  -288,  -4,    308,   100,
46
    -168,  256,   -500,  204,   -508,  648,   -136,  372,   -272,  -120,  -1004, -552,  -548,  -384,  548,   -296,
47
    428,   -108,  -8,    -912,  -324,  -224,  -88,   -112,  -220,  -100,  996,   -796,  548,   360,   -216,  180,
48
    428,   -200,  -212,  148,   96,    148,   284,   216,   -412,  -320,  120,   -300,  -384,  -604,  -572,  -332,
49
    -8,    -180,  -176,  696,   116,   -88,   628,   76,    44,    -516,  240,   -208,  -40,   100,   -592,  344,
50
    -308,  -452,  -228,  20,    916,   -1752, -136,  -340,  -804,  140,   40,    512,   340,   248,   184,   -492,
51
    896,   -156,  932,   -628,  328,   -688,  -448,  -616,  -752,  -100,  560,   -1020, 180,   -800,  -64,   76,
52
    576,   1068,  396,   660,   552,   -108,  -28,   320,   -628,  312,   -92,   -92,   -472,  268,   16,    560,
53
    516,   -672,  -52,   492,   -100,  260,   384,   284,   292,   304,   -148,  88,    -152,  1012,  1064,  -228,
54
    164,   -376,  -684,  592,   -392,  156,   196,   -524,  -64,   -884,  160,   -176,  636,   648,   404,   -396,
55
    -436,  864,   424,   -728,  988,   -604,  904,   -592,  296,   -224,  536,   -176,  -920,  436,   -48,   1176,
56
    -884,  416,   -776,  -824,  -884,  524,   -548,  -564,  -68,   -164,  -96,   692,   364,   -692,  -1012, -68,
57
    260,   -480,  876,   -1116, 452,   -332,  -352,  892,   -1088, 1220,  -676,  12,    -292,  244,   496,   372,
58
    -32,   280,   200,   112,   -440,  -96,   24,    -644,  -184,  56,    -432,  224,   -980,  272,   -260,  144,
59
    -436,  420,   356,   364,   -528,  76,    172,   -744,  -368,  404,   -752,  -416,  684,   -688,  72,    540,
60
    416,   92,    444,   480,   -72,   -1416, 164,   -1172, -68,   24,    424,   264,   1040,  128,   -912,  -524,
61
    -356,  64,    876,   -12,   4,     -88,   532,   272,   -524,  320,   276,   -508,  940,   24,    -400,  -120,
62
    756,   60,    236,   -412,  100,   376,   -484,  400,   -100,  -740,  -108,  -260,  328,   -268,  224,   -200,
63
    -416,  184,   -604,  -564,  -20,   296,   60,    892,   -888,  60,    164,   68,    -760,  216,   -296,  904,
64
    -336,  -28,   404,   -356,  -568,  -208,  -1480, -512,  296,   328,   -360,  -164,  -1560, -776,  1156,  -428,
65
    164,   -504,  -112,  120,   -216,  -148,  -264,  308,   32,    64,    -72,   72,    116,   176,   -64,   -272,
66
    460,   -536,  -784,  -280,  348,   108,   -752,  -132,  524,   -540,  -776,  116,   -296,  -1196, -288,  -560,
67
    1040,  -472,  116,   -848,  -1116, 116,   636,   696,   284,   -176,  1016,  204,   -864,  -648,  -248,  356,
68
    972,   -584,  -204,  264,   880,   528,   -24,   -184,  116,   448,   -144,  828,   524,   212,   -212,  52,
69
    12,    200,   268,   -488,  -404,  -880,  824,   -672,  -40,   908,   -248,  500,   716,   -576,  492,   -576,
70
    16,    720,   -108,  384,   124,   344,   280,   576,   -500,  252,   104,   -308,  196,   -188,  -8,    1268,
71
    296,   1032,  -1196, 436,   316,   372,   -432,  -200,  -660,  704,   -224,  596,   -132,  268,   32,    -452,
72
    884,   104,   -1008, 424,   -1348, -280,  4,     -1168, 368,   476,   696,   300,   -8,    24,    180,   -592,
73
    -196,  388,   304,   500,   724,   -160,  244,   -84,   272,   -256,  -420,  320,   208,   -144,  -156,  156,
74
    364,   452,   28,    540,   316,   220,   -644,  -248,  464,   72,    360,   32,    -388,  496,   -680,  -48,
75
    208,   -116,  -408,  60,    -604,  -392,  548,   -840,  784,   -460,  656,   -544,  -388,  -264,  908,   -800,
76
    -628,  -612,  -568,  572,   -220,  164,   288,   -16,   -308,  308,   -112,  -636,  -760,  280,   -668,  432,
77
    364,   240,   -196,  604,   340,   384,   196,   592,   -44,   -500,  432,   -580,  -132,  636,   -76,   392,
78
    4,     -412,  540,   508,   328,   -356,  -36,   16,    -220,  -64,   -248,  -60,   24,    -192,  368,   1040,
79
    92,    -24,   -1044, -32,   40,    104,   148,   192,   -136,  -520,  56,    -816,  -224,  732,   392,   356,
80
    212,   -80,   -424,  -1008, -324,  588,   -1496, 576,   460,   -816,  -848,  56,    -580,  -92,   -1372, -112,
81
    -496,  200,   364,   52,    -140,  48,    -48,   -60,   84,    72,    40,    132,   -356,  -268,  -104,  -284,
82
    -404,  732,   -520,  164,   -304,  -540,  120,   328,   -76,   -460,  756,   388,   588,   236,   -436,  -72,
83
    -176,  -404,  -316,  -148,  716,   -604,  404,   -72,   -88,   -888,  -68,   944,   88,    -220,  -344,  960,
84
    472,   460,   -232,  704,   120,   832,   -228,  692,   -508,  132,   -476,  844,   -748,  -364,  -44,   1116,
85
    -1104, -1056, 76,    428,   552,   -692,  60,    356,   96,    -384,  -188,  -612,  -576,  736,   508,   892,
86
    352,   -1132, 504,   -24,   -352,  324,   332,   -600,  -312,  292,   508,   -144,  -8,    484,   48,    284,
87
    -260,  -240,  256,   -100,  -292,  -204,  -44,   472,   -204,  908,   -188,  -1000, -256,  92,    1164,  -392,
88
    564,   356,   652,   -28,   -884,  256,   484,   -192,  760,   -176,  376,   -524,  -452,  -436,  860,   -736,
89
    212,   124,   504,   -476,  468,   76,    -472,  552,   -692,  -944,  -620,  740,   -240,  400,   132,   20,
90
    192,   -196,  264,   -668,  -1012, -60,   296,   -316,  -828,  76,    -156,  284,   -768,  -448,  -832,  148,
91
    248,   652,   616,   1236,  288,   -328,  -400,  -124,  588,   220,   520,   -696,  1032,  768,   -740,  -92,
92
    -272,  296,   448,   -464,  412,   -200,  392,   440,   -200,  264,   -152,  -260,  320,   1032,  216,   320,
93
    -8,    -64,   156,   -1016, 1084,  1172,  536,   484,   -432,  132,   372,   -52,   -256,  84,    116,   -352,
94
    48,    116,   304,   -384,  412,   924,   -300,  528,   628,   180,   648,   44,    -980,  -220,  1320,  48,
95
    332,   748,   524,   -268,  -720,  540,   -276,  564,   -344,  -208,  -196,  436,   896,   88,    -392,  132,
96
    80,    -964,  -288,  568,   56,    -48,   -456,  888,   8,     552,   -156,  -292,  948,   288,   128,   -716,
97
    -292,  1192,  -152,  876,   352,   -600,  -260,  -812,  -468,  -28,   -120,  -32,   -44,   1284,  496,   192,
98
    464,   312,   -76,   -516,  -380,  -456,  -1012, -48,   308,   -156,  36,    492,   -156,  -808,  188,   1652,
99
    68,    -120,  -116,  316,   160,   -140,  352,   808,   -416,  592,   316,   -480,  56,    528,   -204,  -568,
100
    372,   -232,  752,   -344,  744,   -4,    324,   -416,  -600,  768,   268,   -248,  -88,   -132,  -420,  -432,
101
    80,    -288,  404,   -316,  -1216, -588,  520,   -108,  92,    -320,  368,   -480,  -216,  -92,   1688,  -300,
102
    180,   1020,  -176,  820,   -68,   -228,  -260,  436,   -904,  20,    40,    -508,  440,   -736,  312,   332,
103
    204,   760,   -372,  728,   96,    -20,   -632,  -520,  -560,  336,   1076,  -64,   -532,  776,   584,   192,
104
    396,   -728,  -520,  276,   -188,  80,    -52,   -612,  -252,  -48,   648,   212,   -688,  228,   -52,   -260,
105
    428,   -412,  -272,  -404,  180,   816,   -796,  48,    152,   484,   -88,   -216,  988,   696,   188,   -528,
106
    648,   -116,  -180,  316,   476,   12,    -564,  96,    476,   -252,  -364,  -376,  -392,  556,   -256,  -576,
107
    260,   -352,  120,   -16,   -136,  -260,  -492,  72,    556,   660,   580,   616,   772,   436,   424,   -32,
108
    -324,  -1268, 416,   -324,  -80,   920,   160,   228,   724,   32,    -516,  64,    384,   68,    -128,  136,
109
    240,   248,   -204,  -68,   252,   -932,  -120,  -480,  -628,  -84,   192,   852,   -404,  -288,  -132,  204,
110
    100,   168,   -68,   -196,  -868,  460,   1080,  380,   -80,   244,   0,     484,   -888,  64,    184,   352,
111
    600,   460,   164,   604,   -196,  320,   -64,   588,   -184,  228,   12,    372,   48,    -848,  -344,  224,
112
    208,   -200,  484,   128,   -20,   272,   -468,  -840,  384,   256,   -720,  -520,  -464,  -580,  112,   -120,
113
    644,   -356,  -208,  -608,  -528,  704,   560,   -424,  392,   828,   40,    84,    200,   -152,  0,     -144,
114
    584,   280,   -120,  80,    -556,  -972,  -196,  -472,  724,   80,    168,   -32,   88,    160,   -688,  0,
115
    160,   356,   372,   -776,  740,   -128,  676,   -248,  -480,  4,     -364,  96,    544,   232,   -1032, 956,
116
    236,   356,   20,    -40,   300,   24,    -676,  -596,  132,   1120,  -104,  532,   -1096, 568,   648,   444,
117
    508,   380,   188,   -376,  -604,  1488,  424,   24,    756,   -220,  -192,  716,   120,   920,   688,   168,
118
    44,    -460,  568,   284,   1144,  1160,  600,   424,   888,   656,   -356,  -320,  220,   316,   -176,  -724,
119
    -188,  -816,  -628,  -348,  -228,  -380,  1012,  -452,  -660,  736,   928,   404,   -696,  -72,   -268,  -892,
120
    128,   184,   -344,  -780,  360,   336,   400,   344,   428,   548,   -112,  136,   -228,  -216,  -820,  -516,
121
    340,   92,    -136,  116,   -300,  376,   -244,  100,   -316,  -520,  -284,  -12,   824,   164,   -548,  -180,
122
    -128,  116,   -924,  -828,  268,   -368,  -580,  620,   192,   160,   0,     -1676, 1068,  424,   -56,   -360,
123
    468,   -156,  720,   288,   -528,  556,   -364,  548,   -148,  504,   316,   152,   -648,  -620,  -684,  -24,
124
    -376,  -384,  -108,  -920,  -1032, 768,   180,   -264,  -508,  -1268, -260,  -60,   300,   -240,  988,   724,
125
    -376,  -576,  -212,  -736,  556,   192,   1092,  -620,  -880,  376,   -56,   -4,    -216,  -32,   836,   268,
126
    396,   1332,  864,   -600,  100,   56,    -412,  -92,   356,   180,   884,   -468,  -436,  292,   -388,  -804,
127
    -704,  -840,  368,   -348,  140,   -724,  1536,  940,   372,   112,   -372,  436,   -480,  1136,  296,   -32,
128
    -228,  132,   -48,   -220,  868,   -1016, -60,   -1044, -464,  328,   916,   244,   12,    -736,  -296,  360,
129
    468,   -376,  -108,  -92,   788,   368,   -56,   544,   400,   -672,  -420,  728,   16,    320,   44,    -284,
130
    -380,  -796,  488,   132,   204,   -596,  -372,  88,    -152,  -908,  -636,  -572,  -624,  -116,  -692,  -200,
131
    -56,   276,   -88,   484,   -324,  948,   864,   1000,  -456,  -184,  -276,  292,   -296,  156,   676,   320,
132
    160,   908,   -84,   -1236, -288,  -116,  260,   -372,  -644,  732,   -756,  -96,   84,    344,   -520,  348,
133
    -688,  240,   -84,   216,   -1044, -136,  -676,  -396,  -1500, 960,   -40,   176,   168,   1516,  420,   -504,
134
    -344,  -364,  -360,  1216,  -940,  -380,  -212,  252,   -660,  -708,  484,   -444,  -152,  928,   -120,  1112,
135
    476,   -260,  560,   -148,  -344,  108,   -196,  228,   -288,  504,   560,   -328,  -88,   288,   -1008, 460,
136
    -228,  468,   -836,  -196,  76,    388,   232,   412,   -1168, -716,  -644,  756,   -172,  -356,  -504,  116,
137
    432,   528,   48,    476,   -168,  -608,  448,   160,   -532,  -272,  28,    -676,  -12,   828,   980,   456,
138
    520,   104,   -104,  256,   -344,  -4,    -28,   -368,  -52,   -524,  -572,  -556,  -200,  768,   1124,  -208,
139
    -512,  176,   232,   248,   -148,  -888,  604,   -600,  -304,  804,   -156,  -212,  488,   -192,  -804,  -256,
140
    368,   -360,  -916,  -328,  228,   -240,  -448,  -472,  856,   -556,  -364,  572,   -12,   -156,  -368,  -340,
141
    432,   252,   -752,  -152,  288,   268,   -580,  -848,  -592,  108,   -76,   244,   312,   -716,  592,   -80,
142
    436,   360,   4,     -248,  160,   516,   584,   732,   44,    -468,  -280,  -292,  -156,  -588,  28,    308,
143
    912,   24,    124,   156,   180,   -252,  944,   -924,  -772,  -520,  -428,  -624,  300,   -212,  -1144, 32,
144
    -724,  800,   -1128, -212,  -1288, -848,  180,   -416,  440,   192,   -576,  -792,  -76,   -1080, 80,    -532,
145
    -352,  -132,  380,   -820,  148,   1112,  128,   164,   456,   700,   -924,  144,   -668,  -384,  648,   -832,
146
    508,   552,   -52,   -100,  -656,  208,   -568,  748,   -88,   680,   232,   300,   192,   -408,  -1012, -152,
147
    -252,  -268,  272,   -876,  -664,  -648,  -332,  -136,  16,    12,    1152,  -28,   332,   -536,  320,   -672,
148
    -460,  -316,  532,   -260,  228,   -40,   1052,  -816,  180,   88,    -496,  -556,  -672,  -368,  428,   92,
149
    356,   404,   -408,  252,   196,   -176,  -556,  792,   268,   32,    372,   40,    96,    -332,  328,   120,
150
    372,   -900,  -40,   472,   -264,  -592,  952,   128,   656,   112,   664,   -232,  420,   4,     -344,  -464,
151
    556,   244,   -416,  -32,   252,   0,     -412,  188,   -696,  508,   -476,  324,   -1096, 656,   -312,  560,
152
    264,   -136,  304,   160,   -64,   -580,  248,   336,   -720,  560,   -348,  -288,  -276,  -196,  -500,  852,
153
    -544,  -236,  -1128, -992,  -776,  116,   56,    52,    860,   884,   212,   -12,   168,   1020,  512,   -552,
154
    924,   -148,  716,   188,   164,   -340,  -520,  -184,  880,   -152,  -680,  -208,  -1156, -300,  -528,  -472,
155
    364,   100,   -744,  -1056, -32,   540,   280,   144,   -676,  -32,   -232,  -280,  -224,  96,    568,   -76,
156
    172,   148,   148,   104,   32,    -296,  -32,   788,   -80,   32,    -16,   280,   288,   944,   428,   -484};
157
158
static const int32_t gauss_bits = 11;
159
160
static int32_t luma_subblock_size_y = 32;
161
static int32_t luma_subblock_size_x = 32;
162
163
static int32_t chroma_subblock_size_y = 16;
164
static int32_t chroma_subblock_size_x = 16;
165
166
static const int32_t min_luma_legal_range = 16;
167
static const int32_t max_luma_legal_range = 235;
168
169
static const int32_t min_chroma_legal_range = 16;
170
static const int32_t max_chroma_legal_range = 240;
171
172
static int32_t scaling_lut_y[256];
173
static int32_t scaling_lut_cb[256];
174
static int32_t scaling_lut_cr[256];
175
176
static int32_t grain_center;
177
static int32_t grain_min;
178
static int32_t grain_max;
179
180
static uint16_t random_register = 0; // random number generator register
181
182
static void init_arrays(const AomFilmGrain* params, int32_t luma_stride, int32_t chroma_stride,
183
                        int32_t*** pred_pos_luma_p, int32_t*** pred_pos_chroma_p, int32_t** luma_grain_block,
184
                        int32_t** cb_grain_block, int32_t** cr_grain_block, int32_t** y_line_buf, int32_t** cb_line_buf,
185
                        int32_t** cr_line_buf, int32_t** y_col_buf, int32_t** cb_col_buf, int32_t** cr_col_buf,
186
                        int32_t luma_grain_samples, int32_t chroma_grain_samples, int32_t chroma_subsamp_y,
187
0
                        int32_t chroma_subsamp_x) {
188
0
    memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
189
0
    memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
190
0
    memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
191
192
0
    int32_t num_pos_luma   = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
193
0
    int32_t num_pos_chroma = num_pos_luma;
194
0
    if (params->num_y_points > 0) {
195
0
        ++num_pos_chroma;
196
0
    }
197
198
0
    int32_t** pred_pos_luma;
199
0
    int32_t** pred_pos_chroma;
200
201
0
    pred_pos_luma = (int32_t**)malloc(sizeof(*pred_pos_luma) * num_pos_luma);
202
0
    ASSERT(pred_pos_luma != NULL);
203
0
    for (int32_t row = 0; row < num_pos_luma; row++) {
204
0
        pred_pos_luma[row] = (int32_t*)malloc(sizeof(**pred_pos_luma) * 3);
205
0
        ASSERT(pred_pos_luma[row]);
206
0
    }
207
208
0
    pred_pos_chroma = (int32_t**)malloc(sizeof(*pred_pos_chroma) * num_pos_chroma);
209
0
    ASSERT(pred_pos_chroma != NULL);
210
0
    for (int32_t row = 0; row < num_pos_chroma; row++) {
211
0
        pred_pos_chroma[row] = (int32_t*)malloc(sizeof(**pred_pos_chroma) * 3);
212
0
        ASSERT(pred_pos_chroma[row]);
213
0
    }
214
215
0
    int32_t pos_ar_index = 0;
216
217
0
    for (int32_t row = -params->ar_coeff_lag; row < 0; row++) {
218
0
        for (int32_t col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1; col++) {
219
0
            pred_pos_luma[pos_ar_index][0] = row;
220
0
            pred_pos_luma[pos_ar_index][1] = col;
221
0
            pred_pos_luma[pos_ar_index][2] = 0;
222
223
0
            pred_pos_chroma[pos_ar_index][0] = row;
224
0
            pred_pos_chroma[pos_ar_index][1] = col;
225
0
            pred_pos_chroma[pos_ar_index][2] = 0;
226
0
            ++pos_ar_index;
227
0
        }
228
0
    }
229
230
0
    for (int32_t col = -params->ar_coeff_lag; col < 0; col++) {
231
0
        pred_pos_luma[pos_ar_index][0] = 0;
232
0
        pred_pos_luma[pos_ar_index][1] = col;
233
0
        pred_pos_luma[pos_ar_index][2] = 0;
234
235
0
        pred_pos_chroma[pos_ar_index][0] = 0;
236
0
        pred_pos_chroma[pos_ar_index][1] = col;
237
0
        pred_pos_chroma[pos_ar_index][2] = 0;
238
239
0
        ++pos_ar_index;
240
0
    }
241
242
0
    if (params->num_y_points > 0) {
243
0
        pred_pos_chroma[pos_ar_index][0] = 0;
244
0
        pred_pos_chroma[pos_ar_index][1] = 0;
245
0
        pred_pos_chroma[pos_ar_index][2] = 1;
246
0
    }
247
248
0
    *pred_pos_luma_p   = pred_pos_luma;
249
0
    *pred_pos_chroma_p = pred_pos_chroma;
250
251
0
    *y_line_buf  = (int32_t*)malloc(sizeof(**y_line_buf) * luma_stride * 2);
252
0
    *cb_line_buf = (int32_t*)malloc(sizeof(**cb_line_buf) * chroma_stride * (2 >> chroma_subsamp_y));
253
0
    *cr_line_buf = (int32_t*)malloc(sizeof(**cr_line_buf) * chroma_stride * (2 >> chroma_subsamp_y));
254
255
0
    *y_col_buf  = (int32_t*)malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2);
256
0
    *cb_col_buf = (int32_t*)malloc(sizeof(**cb_col_buf) * (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
257
0
                                   (2 >> chroma_subsamp_x));
258
0
    *cr_col_buf = (int32_t*)malloc(sizeof(**cr_col_buf) * (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
259
0
                                   (2 >> chroma_subsamp_x));
260
261
0
    *luma_grain_block = (int32_t*)malloc(sizeof(**luma_grain_block) * luma_grain_samples);
262
0
    *cb_grain_block   = (int32_t*)malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
263
0
    *cr_grain_block   = (int32_t*)malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
264
0
}
265
266
static void dealloc_arrays(const AomFilmGrain* params, int32_t*** pred_pos_luma, int32_t*** pred_pos_chroma,
267
                           int32_t** luma_grain_block, int32_t** cb_grain_block, int32_t** cr_grain_block,
268
                           int32_t** y_line_buf, int32_t** cb_line_buf, int32_t** cr_line_buf, int32_t** y_col_buf,
269
0
                           int32_t** cb_col_buf, int32_t** cr_col_buf) {
270
0
    int32_t num_pos_luma   = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
271
0
    int32_t num_pos_chroma = num_pos_luma;
272
0
    if (params->num_y_points > 0) {
273
0
        ++num_pos_chroma;
274
0
    }
275
276
0
    for (int32_t row = 0; row < num_pos_luma; row++) {
277
0
        free((*pred_pos_luma)[row]);
278
0
    }
279
0
    free(*pred_pos_luma);
280
281
0
    for (int32_t row = 0; row < num_pos_chroma; row++) {
282
0
        free((*pred_pos_chroma)[row]);
283
0
    }
284
0
    free((*pred_pos_chroma));
285
286
0
    free(*y_line_buf);
287
288
0
    free(*cb_line_buf);
289
290
0
    free(*cr_line_buf);
291
292
0
    free(*y_col_buf);
293
294
0
    free(*cb_col_buf);
295
296
0
    free(*cr_col_buf);
297
298
0
    free(*luma_grain_block);
299
300
0
    free(*cb_grain_block);
301
302
0
    free(*cr_grain_block);
303
0
}
304
305
// get a number between 0 and 2^bits - 1
306
0
static INLINE int32_t get_random_number(int32_t bits) {
307
0
    uint16_t bit;
308
0
    bit = ((random_register >> 0) ^ (random_register >> 1) ^ (random_register >> 3) ^ (random_register >> 12)) & 1;
309
0
    random_register = (random_register >> 1) | (bit << 15);
310
0
    return (random_register >> (16 - bits)) & ((1 << bits) - 1);
311
0
}
312
313
0
static void init_random_generator(int32_t luma_line, uint16_t seed) {
314
    // same for the picture
315
316
0
    uint16_t msb = (seed >> 8) & 255;
317
0
    uint16_t lsb = seed & 255;
318
319
0
    random_register = (msb << 8) + lsb;
320
321
    //  changes for each row
322
0
    int32_t luma_num = luma_line >> 5;
323
324
0
    random_register ^= ((luma_num * 37 + 178) & 255) << 8;
325
0
    random_register ^= ((luma_num * 173 + 105) & 255);
326
0
}
327
328
static void generate_luma_grain_block(const AomFilmGrain* params, int32_t** pred_pos_luma, int32_t* luma_grain_block,
329
                                      int32_t luma_block_size_y, int32_t luma_block_size_x, int32_t luma_grain_stride,
330
0
                                      int32_t left_pad, int32_t top_pad, int32_t right_pad, int32_t bottom_pad) {
331
0
    if (params->num_y_points == 0) {
332
0
        return;
333
0
    }
334
335
0
    int32_t bit_depth       = params->bit_depth;
336
0
    int32_t gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
337
338
0
    int32_t num_pos_luma    = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
339
0
    int32_t rounding_offset = (1 << (params->ar_coeff_shift - 1));
340
341
0
    for (int32_t i = 0; i < luma_block_size_y; i++) {
342
0
        for (int32_t j = 0; j < luma_block_size_x; j++) {
343
0
            luma_grain_block[i * luma_grain_stride + j] = (gaussian_sequence[get_random_number(gauss_bits)] +
344
0
                                                           ((1 << gauss_sec_shift) >> 1)) >>
345
0
                gauss_sec_shift;
346
0
        }
347
0
    }
348
349
0
    for (int32_t i = top_pad; i < luma_block_size_y - bottom_pad; i++) {
350
0
        for (int32_t j = left_pad; j < luma_block_size_x - right_pad; j++) {
351
0
            int32_t wsum = 0;
352
0
            for (int32_t pos = 0; pos < num_pos_luma; pos++) {
353
0
                wsum = wsum +
354
0
                    params->ar_coeffs_y[pos] *
355
0
                        luma_grain_block[(i + pred_pos_luma[pos][0]) * luma_grain_stride + j + pred_pos_luma[pos][1]];
356
0
            }
357
0
            luma_grain_block[i * luma_grain_stride + j] = clamp(
358
0
                luma_grain_block[i * luma_grain_stride + j] + ((wsum + rounding_offset) >> params->ar_coeff_shift),
359
0
                grain_min,
360
0
                grain_max);
361
0
        }
362
0
    }
363
0
}
364
365
static void generate_chroma_grain_blocks(const AomFilmGrain* params,
366
                                         //                                  int32_t** pred_pos_luma,
367
                                         int32_t** pred_pos_chroma, int32_t* luma_grain_block, int32_t* cb_grain_block,
368
                                         int32_t* cr_grain_block, int32_t luma_grain_stride,
369
                                         int32_t chroma_block_size_y, int32_t chroma_block_size_x,
370
                                         int32_t chroma_grain_stride, int32_t left_pad, int32_t top_pad,
371
                                         int32_t right_pad, int32_t bottom_pad, int32_t chroma_subsamp_y,
372
0
                                         int32_t chroma_subsamp_x) {
373
0
    int32_t bit_depth       = params->bit_depth;
374
0
    int32_t gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
375
376
0
    int32_t num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
377
0
    if (params->num_y_points > 0) {
378
0
        ++num_pos_chroma;
379
0
    }
380
0
    int32_t rounding_offset = (1 << (params->ar_coeff_shift - 1));
381
382
0
    int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
383
384
0
    if (params->num_cb_points || params->chroma_scaling_from_luma) {
385
0
        init_random_generator(7 << 5, params->random_seed);
386
387
0
        for (int32_t i = 0; i < chroma_block_size_y; i++) {
388
0
            for (int32_t j = 0; j < chroma_block_size_x; j++) {
389
0
                cb_grain_block[i * chroma_grain_stride + j] = (gaussian_sequence[get_random_number(gauss_bits)] +
390
0
                                                               ((1 << gauss_sec_shift) >> 1)) >>
391
0
                    gauss_sec_shift;
392
0
            }
393
0
        }
394
0
    } else {
395
0
        memset(cb_grain_block, 0, sizeof(*cb_grain_block) * chroma_grain_block_size);
396
0
    }
397
0
    if (params->num_cr_points || params->chroma_scaling_from_luma) {
398
0
        init_random_generator(11 << 5, params->random_seed);
399
400
0
        for (int32_t i = 0; i < chroma_block_size_y; i++) {
401
0
            for (int32_t j = 0; j < chroma_block_size_x; j++) {
402
0
                cr_grain_block[i * chroma_grain_stride + j] = (gaussian_sequence[get_random_number(gauss_bits)] +
403
0
                                                               ((1 << gauss_sec_shift) >> 1)) >>
404
0
                    gauss_sec_shift;
405
0
            }
406
0
        }
407
0
    } else {
408
0
        memset(cr_grain_block, 0, sizeof(*cr_grain_block) * chroma_grain_block_size);
409
0
    }
410
411
0
    for (int32_t i = top_pad; i < chroma_block_size_y - bottom_pad; i++) {
412
0
        for (int32_t j = left_pad; j < chroma_block_size_x - right_pad; j++) {
413
0
            int32_t wsum_cb = 0;
414
0
            int32_t wsum_cr = 0;
415
0
            for (int32_t pos = 0; pos < num_pos_chroma; pos++) {
416
0
                if (pred_pos_chroma[pos][2] == 0) {
417
0
                    wsum_cb = wsum_cb +
418
0
                        params->ar_coeffs_cb[pos] *
419
0
                            cb_grain_block[(i + pred_pos_chroma[pos][0]) * chroma_grain_stride + j +
420
0
                                           pred_pos_chroma[pos][1]];
421
0
                    wsum_cr = wsum_cr +
422
0
                        params->ar_coeffs_cr[pos] *
423
0
                            cr_grain_block[(i + pred_pos_chroma[pos][0]) * chroma_grain_stride + j +
424
0
                                           pred_pos_chroma[pos][1]];
425
0
                } else if (pred_pos_chroma[pos][2] == 1) {
426
0
                    int32_t av_luma      = 0;
427
0
                    int32_t luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad;
428
0
                    int32_t luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad;
429
430
0
                    for (int32_t k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1; k++) {
431
0
                        for (int32_t l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1; l++) {
432
0
                            av_luma += luma_grain_block[k * luma_grain_stride + l];
433
0
                        }
434
0
                    }
435
436
0
                    av_luma = (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >>
437
0
                        (chroma_subsamp_y + chroma_subsamp_x);
438
439
0
                    wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
440
0
                    wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
441
0
                } else {
442
0
                    SVT_ERROR(
443
0
                        "Grain synthesis: prediction between two chroma components is "
444
0
                        "not supported!");
445
0
                    exit(1);
446
0
                }
447
0
            }
448
0
            if (params->num_cb_points || params->chroma_scaling_from_luma) {
449
0
                cb_grain_block[i * chroma_grain_stride + j] = clamp(
450
0
                    cb_grain_block[i * chroma_grain_stride + j] +
451
0
                        ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
452
0
                    grain_min,
453
0
                    grain_max);
454
0
            }
455
0
            if (params->num_cr_points || params->chroma_scaling_from_luma) {
456
0
                cr_grain_block[i * chroma_grain_stride + j] = clamp(
457
0
                    cr_grain_block[i * chroma_grain_stride + j] +
458
0
                        ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
459
0
                    grain_min,
460
0
                    grain_max);
461
0
            }
462
0
        }
463
0
    }
464
0
}
465
466
0
static void init_scaling_function(const int32_t scaling_points[][2], int32_t num_points, int32_t scaling_lut[]) {
467
0
    if (num_points == 0) {
468
0
        return;
469
0
    }
470
471
0
    for (int32_t i = 0; i < scaling_points[0][0]; i++) {
472
0
        scaling_lut[i] = scaling_points[0][1];
473
0
    }
474
475
0
    for (int32_t point = 0; point < num_points - 1; point++) {
476
0
        int64_t delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
477
0
        int64_t delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
478
479
0
        int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
480
481
0
        for (int32_t x = 0; x < delta_x; x++) {
482
0
            scaling_lut[scaling_points[point][0] + x] = scaling_points[point][1] + (int32_t)((x * delta + 32768) >> 16);
483
0
        }
484
0
    }
485
486
0
    for (int32_t i = scaling_points[num_points - 1][0]; i < 256; i++) {
487
0
        scaling_lut[i] = scaling_points[num_points - 1][1];
488
0
    }
489
0
}
490
491
// function that extracts samples from a lut (and interpolates intemediate
492
// frames for 10- and 12-bit video)
493
0
static int32_t scale_lut(int32_t* scaling_lut, int32_t index, int32_t bit_depth) {
494
0
    int32_t x = index >> (bit_depth - 8);
495
496
0
    if (!(bit_depth - 8) || x == 255) {
497
0
        return scaling_lut[x];
498
0
    } else {
499
0
        return scaling_lut[x] +
500
0
            (((scaling_lut[x + 1] - scaling_lut[x]) * (index & ((1 << (bit_depth - 8)) - 1)) +
501
0
              (1 << (bit_depth - 9))) >>
502
0
             (bit_depth - 8));
503
0
    }
504
0
}
505
506
static void add_noise_to_block(const AomFilmGrain* params, uint8_t* luma, uint8_t* cb, uint8_t* cr, int32_t luma_stride,
507
                               int32_t chroma_stride, int32_t* luma_grain, int32_t* cb_grain, int32_t* cr_grain,
508
                               int32_t luma_grain_stride, int32_t chroma_grain_stride, int32_t half_luma_height,
509
                               int32_t half_luma_width, int32_t bit_depth, int32_t chroma_subsamp_y,
510
0
                               int32_t chroma_subsamp_x) {
511
0
    int32_t cb_mult      = params->cb_mult - 128; // fixed scale
512
0
    int32_t cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
513
0
    int32_t cb_offset    = params->cb_offset - 256;
514
515
0
    int32_t cr_mult      = params->cr_mult - 128; // fixed scale
516
0
    int32_t cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
517
0
    int32_t cr_offset    = params->cr_offset - 256;
518
519
0
    int32_t rounding_offset = (1 << (params->scaling_shift - 1));
520
521
0
    int32_t apply_y  = params->num_y_points > 0 ? 1 : 0;
522
0
    int32_t apply_cb = (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
523
0
    int32_t apply_cr = (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
524
525
0
    if (params->chroma_scaling_from_luma) {
526
0
        cb_mult      = 0; // fixed scale
527
0
        cb_luma_mult = 64; // fixed scale
528
0
        cb_offset    = 0;
529
530
0
        cr_mult      = 0; // fixed scale
531
0
        cr_luma_mult = 64; // fixed scale
532
0
        cr_offset    = 0;
533
0
    }
534
535
0
    int32_t min_luma, max_luma, min_chroma, max_chroma;
536
537
0
    if (params->clip_to_restricted_range) {
538
0
        min_luma = min_luma_legal_range;
539
0
        max_luma = max_luma_legal_range;
540
541
0
        min_chroma = min_chroma_legal_range;
542
0
        max_chroma = max_chroma_legal_range;
543
0
    } else {
544
0
        min_luma = min_chroma = 0;
545
0
        max_luma = max_chroma = 255;
546
0
    }
547
548
0
    for (int32_t i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
549
0
        for (int32_t j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
550
0
            int32_t average_luma = 0;
551
0
            if (chroma_subsamp_x) {
552
0
                average_luma = (luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x)] +
553
0
                                luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x) + 1] + 1) >>
554
0
                    1;
555
0
            } else {
556
0
                average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
557
0
            }
558
0
            if (apply_cb) {
559
0
                cb[i * chroma_stride + j] = clamp(
560
0
                    cb[i * chroma_stride + j] +
561
0
                        ((scale_lut(scaling_lut_cb,
562
0
                                    clamp(((average_luma * cb_luma_mult + cb_mult * cb[i * chroma_stride + j]) >> 6) +
563
0
                                              cb_offset,
564
0
                                          0,
565
0
                                          (256 << (bit_depth - 8)) - 1),
566
0
                                    8) *
567
0
                              cb_grain[i * chroma_grain_stride + j] +
568
0
                          rounding_offset) >>
569
0
                         params->scaling_shift),
570
0
                    min_chroma,
571
0
                    max_chroma);
572
0
            }
573
574
0
            if (apply_cr) {
575
0
                cr[i * chroma_stride + j] = clamp(
576
0
                    cr[i * chroma_stride + j] +
577
0
                        ((scale_lut(scaling_lut_cr,
578
0
                                    clamp(((average_luma * cr_luma_mult + cr_mult * cr[i * chroma_stride + j]) >> 6) +
579
0
                                              cr_offset,
580
0
                                          0,
581
0
                                          (256 << (bit_depth - 8)) - 1),
582
0
                                    8) *
583
0
                              cr_grain[i * chroma_grain_stride + j] +
584
0
                          rounding_offset) >>
585
0
                         params->scaling_shift),
586
0
                    min_chroma,
587
0
                    max_chroma);
588
0
            }
589
0
        }
590
0
    }
591
592
0
    if (apply_y) {
593
0
        for (int32_t i = 0; i < (half_luma_height << 1); i++) {
594
0
            for (int32_t j = 0; j < (half_luma_width << 1); j++) {
595
0
                luma[i * luma_stride + j] = clamp(luma[i * luma_stride + j] +
596
0
                                                      ((scale_lut(scaling_lut_y, luma[i * luma_stride + j], 8) *
597
0
                                                            luma_grain[i * luma_grain_stride + j] +
598
0
                                                        rounding_offset) >>
599
0
                                                       params->scaling_shift),
600
0
                                                  min_luma,
601
0
                                                  max_luma);
602
0
            }
603
0
        }
604
0
    }
605
0
}
606
607
static void add_noise_to_block_hbd(const AomFilmGrain* params, uint16_t* luma, uint16_t* cb, uint16_t* cr,
608
                                   int32_t luma_stride, int32_t chroma_stride, int32_t* luma_grain, int32_t* cb_grain,
609
                                   int32_t* cr_grain, int32_t luma_grain_stride, int32_t chroma_grain_stride,
610
                                   int32_t half_luma_height, int32_t half_luma_width, int32_t bit_depth,
611
0
                                   int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) {
612
0
    int32_t cb_mult      = params->cb_mult - 128; // fixed scale
613
0
    int32_t cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
614
    // offset value depends on the bit depth
615
0
    int32_t cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
616
617
0
    int32_t cr_mult      = params->cr_mult - 128; // fixed scale
618
0
    int32_t cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
619
    // offset value depends on the bit depth
620
0
    int32_t cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
621
622
0
    int32_t rounding_offset = (1 << (params->scaling_shift - 1));
623
624
0
    int32_t apply_y  = params->num_y_points > 0 ? 1 : 0;
625
0
    int32_t apply_cb = params->num_cb_points > 0 ? 1 : 0;
626
0
    int32_t apply_cr = params->num_cr_points > 0 ? 1 : 0;
627
628
0
    if (params->chroma_scaling_from_luma) {
629
0
        cb_mult      = 0; // fixed scale
630
0
        cb_luma_mult = 64; // fixed scale
631
0
        cb_offset    = 0;
632
633
0
        cr_mult      = 0; // fixed scale
634
0
        cr_luma_mult = 64; // fixed scale
635
0
        cr_offset    = 0;
636
0
    }
637
638
0
    int32_t min_luma, max_luma, min_chroma, max_chroma;
639
640
0
    if (params->clip_to_restricted_range) {
641
0
        min_luma = min_luma_legal_range << (bit_depth - 8);
642
0
        max_luma = max_luma_legal_range << (bit_depth - 8);
643
644
0
        min_chroma = min_chroma_legal_range << (bit_depth - 8);
645
0
        max_chroma = max_chroma_legal_range << (bit_depth - 8);
646
0
    } else {
647
0
        min_luma = min_chroma = 0;
648
0
        max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
649
0
    }
650
651
0
    for (int32_t i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
652
0
        for (int32_t j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
653
0
            int32_t average_luma = 0;
654
0
            if (chroma_subsamp_x) {
655
0
                average_luma = (luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x)] +
656
0
                                luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x) + 1] + 1) >>
657
0
                    1;
658
0
            } else {
659
0
                average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
660
0
            }
661
0
            if (apply_cb) {
662
0
                cb[i * chroma_stride + j] = clamp(
663
0
                    cb[i * chroma_stride + j] +
664
0
                        ((scale_lut(scaling_lut_cb,
665
0
                                    clamp(((average_luma * cb_luma_mult + cb_mult * cb[i * chroma_stride + j]) >> 6) +
666
0
                                              cb_offset,
667
0
                                          0,
668
0
                                          (256 << (bit_depth - 8)) - 1),
669
0
                                    bit_depth) *
670
0
                              cb_grain[i * chroma_grain_stride + j] +
671
0
                          rounding_offset) >>
672
0
                         params->scaling_shift),
673
0
                    min_chroma,
674
0
                    max_chroma);
675
0
            }
676
0
            if (apply_cr) {
677
0
                cr[i * chroma_stride + j] = clamp(
678
0
                    cr[i * chroma_stride + j] +
679
0
                        ((scale_lut(scaling_lut_cr,
680
0
                                    clamp(((average_luma * cr_luma_mult + cr_mult * cr[i * chroma_stride + j]) >> 6) +
681
0
                                              cr_offset,
682
0
                                          0,
683
0
                                          (256 << (bit_depth - 8)) - 1),
684
0
                                    bit_depth) *
685
0
                              cr_grain[i * chroma_grain_stride + j] +
686
0
                          rounding_offset) >>
687
0
                         params->scaling_shift),
688
0
                    min_chroma,
689
0
                    max_chroma);
690
0
            }
691
0
        }
692
0
    }
693
694
0
    if (apply_y) {
695
0
        for (int32_t i = 0; i < (half_luma_height << 1); i++) {
696
0
            for (int32_t j = 0; j < (half_luma_width << 1); j++) {
697
0
                luma[i * luma_stride + j] = clamp(luma[i * luma_stride + j] +
698
0
                                                      ((scale_lut(scaling_lut_y, luma[i * luma_stride + j], bit_depth) *
699
0
                                                            luma_grain[i * luma_grain_stride + j] +
700
0
                                                        rounding_offset) >>
701
0
                                                       params->scaling_shift),
702
0
                                                  min_luma,
703
0
                                                  max_luma);
704
0
            }
705
0
        }
706
0
    }
707
0
}
708
709
0
int32_t svt_aom_film_grain_params_equal(const AomFilmGrain* pars_a, const AomFilmGrain* pars_b) {
710
0
    if (pars_a->apply_grain != pars_b->apply_grain) {
711
0
        return 0;
712
0
    }
713
0
    if (pars_a->overlap_flag != pars_b->overlap_flag) {
714
0
        return 0;
715
0
    }
716
0
    if (pars_a->clip_to_restricted_range != pars_b->clip_to_restricted_range) {
717
0
        return 0;
718
0
    }
719
0
    if (pars_a->chroma_scaling_from_luma != pars_b->chroma_scaling_from_luma) {
720
0
        return 0;
721
0
    }
722
0
    if (pars_a->grain_scale_shift != pars_b->grain_scale_shift) {
723
0
        return 0;
724
0
    }
725
0
    if (pars_a->ar_coeff_shift != pars_b->ar_coeff_shift) {
726
0
        return 0;
727
0
    }
728
0
    if (pars_a->cb_mult != pars_b->cb_mult) {
729
0
        return 0;
730
0
    }
731
0
    if (pars_a->cb_luma_mult != pars_b->cb_luma_mult) {
732
0
        return 0;
733
0
    }
734
0
    if (pars_a->cb_offset != pars_b->cb_offset) {
735
0
        return 0;
736
0
    }
737
0
    if (pars_a->cr_mult != pars_b->cr_mult) {
738
0
        return 0;
739
0
    }
740
0
    if (pars_a->cr_luma_mult != pars_b->cr_luma_mult) {
741
0
        return 0;
742
0
    }
743
0
    if (pars_a->cr_offset != pars_b->cr_offset) {
744
0
        return 0;
745
0
    }
746
747
0
    if (pars_a->scaling_shift != pars_b->scaling_shift) {
748
0
        return 0;
749
0
    }
750
0
    if (pars_a->ar_coeff_lag != pars_b->ar_coeff_lag) {
751
0
        return 0;
752
0
    }
753
754
0
    if (pars_a->num_y_points != pars_b->num_y_points) {
755
0
        return 0;
756
0
    }
757
758
0
    if (pars_a->num_cb_points != pars_b->num_cb_points) {
759
0
        return 0;
760
0
    }
761
762
0
    if (pars_a->num_cr_points != pars_b->num_cr_points) {
763
0
        return 0;
764
0
    }
765
766
0
    if (memcmp(pars_a->scaling_points_y, pars_b->scaling_points_y, sizeof(pars_b->scaling_points_y))) {
767
0
        return 0;
768
0
    }
769
770
0
    if (memcmp(pars_a->scaling_points_cb, pars_b->scaling_points_cb, sizeof(pars_b->scaling_points_cb))) {
771
0
        return 0;
772
0
    }
773
774
0
    if (memcmp(pars_a->scaling_points_cr, pars_b->scaling_points_cr, sizeof(pars_b->scaling_points_cr))) {
775
0
        return 0;
776
0
    }
777
778
0
    if (memcmp(pars_a->ar_coeffs_y, pars_b->ar_coeffs_y, sizeof(pars_b->ar_coeffs_y))) {
779
0
        return 0;
780
0
    }
781
782
0
    if (memcmp(pars_a->ar_coeffs_cb, pars_b->ar_coeffs_cb, sizeof(pars_b->ar_coeffs_cb))) {
783
0
        return 0;
784
0
    }
785
786
0
    if (memcmp(pars_a->ar_coeffs_cr, pars_b->ar_coeffs_cr, sizeof(pars_b->ar_coeffs_cr))) {
787
0
        return 0;
788
0
    }
789
790
0
    return 1;
791
0
}
792
793
void svt_aom_fgn_copy_rect(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width,
794
0
                           int32_t height, int32_t use_high_bit_depth) {
795
0
    int32_t hbd_coeff = use_high_bit_depth ? 2 : 1;
796
0
    while (height) {
797
0
        svt_memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
798
0
        src += src_stride * hbd_coeff;
799
0
        dst += dst_stride * hbd_coeff;
800
0
        --height;
801
0
    }
802
0
    return;
803
0
}
804
805
static void copy_area(int32_t* src, int32_t src_stride, int32_t* dst, int32_t dst_stride, int32_t width,
806
0
                      int32_t height) {
807
0
    while (height) {
808
0
        if (svt_memcpy != NULL) {
809
0
            svt_memcpy(dst, src, width * sizeof(*src));
810
0
        } else {
811
0
            svt_memcpy_c(dst, src, width * sizeof(*src));
812
0
        }
813
0
        src += src_stride;
814
0
        dst += dst_stride;
815
0
        --height;
816
0
    }
817
0
    return;
818
0
}
819
820
static void ver_boundary_overlap(int32_t* left_block, int32_t left_stride, int32_t* right_block, int32_t right_stride,
821
0
                                 int32_t* dst_block, int32_t dst_stride, int32_t width, int32_t height) {
822
0
    if (width == 1) {
823
0
        while (height) {
824
0
            *dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5, grain_min, grain_max);
825
0
            left_block += left_stride;
826
0
            right_block += right_stride;
827
0
            dst_block += dst_stride;
828
0
            --height;
829
0
        }
830
0
        return;
831
0
    } else if (width == 2) {
832
0
        while (height) {
833
0
            dst_block[0] = clamp((27 * left_block[0] + 17 * right_block[0] + 16) >> 5, grain_min, grain_max);
834
0
            dst_block[1] = clamp((17 * left_block[1] + 27 * right_block[1] + 16) >> 5, grain_min, grain_max);
835
0
            left_block += left_stride;
836
0
            right_block += right_stride;
837
0
            dst_block += dst_stride;
838
0
            --height;
839
0
        }
840
0
        return;
841
0
    }
842
0
}
843
844
static void hor_boundary_overlap(int32_t* top_block, int32_t top_stride, int32_t* bottom_block, int32_t bottom_stride,
845
0
                                 int32_t* dst_block, int32_t dst_stride, int32_t width, int32_t height) {
846
0
    if (height == 1) {
847
0
        while (width) {
848
0
            *dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5, grain_min, grain_max);
849
0
            ++top_block;
850
0
            ++bottom_block;
851
0
            ++dst_block;
852
0
            --width;
853
0
        }
854
0
        return;
855
0
    } else if (height == 2) {
856
0
        while (width) {
857
0
            dst_block[0]          = clamp((27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5, grain_min, grain_max);
858
0
            dst_block[dst_stride] = clamp(
859
0
                (17 * top_block[top_stride] + 27 * bottom_block[bottom_stride] + 16) >> 5, grain_min, grain_max);
860
0
            ++top_block;
861
0
            ++bottom_block;
862
0
            ++dst_block;
863
0
            --width;
864
0
        }
865
0
        return;
866
0
    }
867
0
}
868
869
void svt_av1_add_film_grain_run(const AomFilmGrain* params, uint8_t* luma, uint8_t* cb, uint8_t* cr, int32_t height,
870
                                int32_t width, int32_t luma_stride, int32_t chroma_stride, int32_t use_high_bit_depth,
871
0
                                int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) {
872
0
    int32_t** pred_pos_luma;
873
0
    int32_t** pred_pos_chroma;
874
0
    int32_t*  luma_grain_block;
875
0
    int32_t*  cb_grain_block;
876
0
    int32_t*  cr_grain_block;
877
878
0
    int32_t* y_line_buf;
879
0
    int32_t* cb_line_buf;
880
0
    int32_t* cr_line_buf;
881
882
0
    int32_t* y_col_buf;
883
0
    int32_t* cb_col_buf;
884
0
    int32_t* cr_col_buf;
885
886
0
    random_register = params->random_seed;
887
888
0
    int32_t left_pad   = 3;
889
0
    int32_t right_pad  = 3; // padding to offset for AR coefficients
890
0
    int32_t top_pad    = 3;
891
0
    int32_t bottom_pad = 0;
892
893
0
    int32_t ar_padding = 3; // maximum lag used for stabilization of AR coefficients
894
895
0
    luma_subblock_size_y = 32;
896
0
    luma_subblock_size_x = 32;
897
898
0
    chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
899
0
    chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
900
901
    // Initial padding is only needed for generation of
902
    // film grain templates (to stabilize the AR process)
903
    // Only a 64x64 luma and 32x32 chroma part of a template
904
    // is used later for adding grain, padding can be discarded
905
906
0
    int32_t luma_block_size_y = top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad;
907
0
    int32_t luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 + 2 * ar_padding + right_pad;
908
909
0
    int32_t chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding + chroma_subblock_size_y * 2 +
910
0
        bottom_pad;
911
0
    int32_t chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding + chroma_subblock_size_x * 2 +
912
0
        (2 >> chroma_subsamp_x) * ar_padding + right_pad;
913
914
0
    int32_t luma_grain_stride   = luma_block_size_x;
915
0
    int32_t chroma_grain_stride = chroma_block_size_x;
916
917
0
    int32_t overlap   = params->overlap_flag;
918
0
    int32_t bit_depth = params->bit_depth;
919
920
0
    grain_center = 128 << (bit_depth - 8);
921
0
    grain_min    = 0 - grain_center;
922
0
    grain_max    = (256 << (bit_depth - 8)) - 1 - grain_center;
923
924
0
    init_arrays(params,
925
0
                luma_stride,
926
0
                chroma_stride,
927
0
                &pred_pos_luma,
928
0
                &pred_pos_chroma,
929
0
                &luma_grain_block,
930
0
                &cb_grain_block,
931
0
                &cr_grain_block,
932
0
                &y_line_buf,
933
0
                &cb_line_buf,
934
0
                &cr_line_buf,
935
0
                &y_col_buf,
936
0
                &cb_col_buf,
937
0
                &cr_col_buf,
938
0
                luma_block_size_y * luma_block_size_x,
939
0
                chroma_block_size_y * chroma_block_size_x,
940
0
                chroma_subsamp_y,
941
0
                chroma_subsamp_x);
942
943
0
    generate_luma_grain_block(params,
944
0
                              pred_pos_luma,
945
0
                              luma_grain_block,
946
0
                              luma_block_size_y,
947
0
                              luma_block_size_x,
948
0
                              luma_grain_stride,
949
0
                              left_pad,
950
0
                              top_pad,
951
0
                              right_pad,
952
0
                              bottom_pad);
953
954
0
    generate_chroma_grain_blocks(params,
955
                                 //                               pred_pos_luma,
956
0
                                 pred_pos_chroma,
957
0
                                 luma_grain_block,
958
0
                                 cb_grain_block,
959
0
                                 cr_grain_block,
960
0
                                 luma_grain_stride,
961
0
                                 chroma_block_size_y,
962
0
                                 chroma_block_size_x,
963
0
                                 chroma_grain_stride,
964
0
                                 left_pad,
965
0
                                 top_pad,
966
0
                                 right_pad,
967
0
                                 bottom_pad,
968
0
                                 chroma_subsamp_y,
969
0
                                 chroma_subsamp_x);
970
971
0
    init_scaling_function(params->scaling_points_y, params->num_y_points, scaling_lut_y);
972
973
0
    if (params->chroma_scaling_from_luma) {
974
0
        svt_memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
975
0
        svt_memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
976
0
    } else {
977
0
        init_scaling_function(params->scaling_points_cb, params->num_cb_points, scaling_lut_cb);
978
0
        init_scaling_function(params->scaling_points_cr, params->num_cr_points, scaling_lut_cr);
979
0
    }
980
0
    for (int32_t y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
981
0
        init_random_generator(y * 2, params->random_seed);
982
983
0
        for (int32_t x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
984
0
            int32_t offset_y = get_random_number(8);
985
0
            int32_t offset_x = (offset_y >> 4) & 15;
986
0
            offset_y &= 15;
987
988
0
            int32_t luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
989
0
            int32_t luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
990
991
0
            int32_t chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
992
0
                offset_y * (2 >> chroma_subsamp_y);
993
0
            int32_t chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
994
0
                offset_x * (2 >> chroma_subsamp_x);
995
996
0
            if (overlap && x) {
997
0
                ver_boundary_overlap(y_col_buf,
998
0
                                     2,
999
0
                                     luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x,
1000
0
                                     luma_grain_stride,
1001
0
                                     y_col_buf,
1002
0
                                     2,
1003
0
                                     2,
1004
0
                                     AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1005
1006
0
                ver_boundary_overlap(
1007
0
                    cb_col_buf,
1008
0
                    2 >> chroma_subsamp_x,
1009
0
                    cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x,
1010
0
                    chroma_grain_stride,
1011
0
                    cb_col_buf,
1012
0
                    2 >> chroma_subsamp_x,
1013
0
                    2 >> chroma_subsamp_x,
1014
0
                    AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y), (height - (y << 1)) >> chroma_subsamp_y));
1015
1016
0
                ver_boundary_overlap(
1017
0
                    cr_col_buf,
1018
0
                    2 >> chroma_subsamp_x,
1019
0
                    cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x,
1020
0
                    chroma_grain_stride,
1021
0
                    cr_col_buf,
1022
0
                    2 >> chroma_subsamp_x,
1023
0
                    2 >> chroma_subsamp_x,
1024
0
                    AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y), (height - (y << 1)) >> chroma_subsamp_y));
1025
1026
0
                int32_t i = y ? 1 : 0;
1027
1028
0
                if (use_high_bit_depth) {
1029
0
                    add_noise_to_block_hbd(params,
1030
0
                                           (uint16_t*)luma + ((y + i) << 1) * luma_stride + (x << 1),
1031
0
                                           (uint16_t*)cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1032
0
                                               (x << (1 - chroma_subsamp_x)),
1033
0
                                           (uint16_t*)cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1034
0
                                               (x << (1 - chroma_subsamp_x)),
1035
0
                                           luma_stride,
1036
0
                                           chroma_stride,
1037
0
                                           y_col_buf + i * 4,
1038
0
                                           cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1039
0
                                           cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1040
0
                                           2,
1041
0
                                           (2 - chroma_subsamp_x),
1042
0
                                           AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1043
0
                                           1,
1044
0
                                           bit_depth,
1045
0
                                           chroma_subsamp_y,
1046
0
                                           chroma_subsamp_x);
1047
0
                } else {
1048
0
                    add_noise_to_block(
1049
0
                        params,
1050
0
                        luma + ((y + i) << 1) * luma_stride + (x << 1),
1051
0
                        cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + (x << (1 - chroma_subsamp_x)),
1052
0
                        cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + (x << (1 - chroma_subsamp_x)),
1053
0
                        luma_stride,
1054
0
                        chroma_stride,
1055
0
                        y_col_buf + i * 4,
1056
0
                        cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1057
0
                        cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1058
0
                        2,
1059
0
                        (2 - chroma_subsamp_x),
1060
0
                        AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1061
0
                        1,
1062
0
                        bit_depth,
1063
0
                        chroma_subsamp_y,
1064
0
                        chroma_subsamp_x);
1065
0
                }
1066
0
            }
1067
1068
0
            if (overlap && y) {
1069
0
                if (x) {
1070
0
                    ASSERT(y_col_buf != NULL);
1071
0
                    hor_boundary_overlap(
1072
0
                        y_line_buf + (x << 1), luma_stride, y_col_buf, 2, y_line_buf + (x << 1), luma_stride, 2, 2);
1073
1074
0
                    hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
1075
0
                                         chroma_stride,
1076
0
                                         cb_col_buf,
1077
0
                                         2 >> chroma_subsamp_x,
1078
0
                                         cb_line_buf + x * (2 >> chroma_subsamp_x),
1079
0
                                         chroma_stride,
1080
0
                                         2 >> chroma_subsamp_x,
1081
0
                                         2 >> chroma_subsamp_y);
1082
1083
0
                    hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
1084
0
                                         chroma_stride,
1085
0
                                         cr_col_buf,
1086
0
                                         2 >> chroma_subsamp_x,
1087
0
                                         cr_line_buf + x * (2 >> chroma_subsamp_x),
1088
0
                                         chroma_stride,
1089
0
                                         2 >> chroma_subsamp_x,
1090
0
                                         2 >> chroma_subsamp_y);
1091
0
                }
1092
1093
0
                hor_boundary_overlap(y_line_buf + ((x ? x + 1 : 0) << 1),
1094
0
                                     luma_stride,
1095
0
                                     luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x + (x ? 2 : 0),
1096
0
                                     luma_grain_stride,
1097
0
                                     y_line_buf + ((x ? x + 1 : 0) << 1),
1098
0
                                     luma_stride,
1099
0
                                     AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1), width - ((x ? x + 1 : 0) << 1)),
1100
0
                                     2);
1101
1102
0
                hor_boundary_overlap(cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1103
0
                                     chroma_stride,
1104
0
                                     cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1105
0
                                         ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1106
0
                                     chroma_grain_stride,
1107
0
                                     cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1108
0
                                     chroma_stride,
1109
0
                                     AOMMIN(chroma_subblock_size_x - ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1110
0
                                            (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1111
0
                                     2 >> chroma_subsamp_y);
1112
1113
0
                hor_boundary_overlap(cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1114
0
                                     chroma_stride,
1115
0
                                     cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1116
0
                                         ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1117
0
                                     chroma_grain_stride,
1118
0
                                     cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1119
0
                                     chroma_stride,
1120
0
                                     AOMMIN(chroma_subblock_size_x - ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1121
0
                                            (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1122
0
                                     2 >> chroma_subsamp_y);
1123
1124
0
                if (use_high_bit_depth) {
1125
0
                    add_noise_to_block_hbd(
1126
0
                        params,
1127
0
                        (uint16_t*)luma + (y << 1) * luma_stride + (x << 1),
1128
0
                        (uint16_t*)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride + (x << ((1 - chroma_subsamp_x))),
1129
0
                        (uint16_t*)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride + (x << ((1 - chroma_subsamp_x))),
1130
0
                        luma_stride,
1131
0
                        chroma_stride,
1132
0
                        y_line_buf + (x << 1),
1133
0
                        cb_line_buf + (x << (1 - chroma_subsamp_x)),
1134
0
                        cr_line_buf + (x << (1 - chroma_subsamp_x)),
1135
0
                        luma_stride,
1136
0
                        chroma_stride,
1137
0
                        1,
1138
0
                        AOMMIN(luma_subblock_size_x >> 1, width / 2 - x),
1139
0
                        bit_depth,
1140
0
                        chroma_subsamp_y,
1141
0
                        chroma_subsamp_x);
1142
0
                } else {
1143
0
                    add_noise_to_block(
1144
0
                        params,
1145
0
                        luma + (y << 1) * luma_stride + (x << 1),
1146
0
                        cb + (y << (1 - chroma_subsamp_y)) * chroma_stride + (x << ((1 - chroma_subsamp_x))),
1147
0
                        cr + (y << (1 - chroma_subsamp_y)) * chroma_stride + (x << ((1 - chroma_subsamp_x))),
1148
0
                        luma_stride,
1149
0
                        chroma_stride,
1150
0
                        y_line_buf + (x << 1),
1151
0
                        cb_line_buf + (x << (1 - chroma_subsamp_x)),
1152
0
                        cr_line_buf + (x << (1 - chroma_subsamp_x)),
1153
0
                        luma_stride,
1154
0
                        chroma_stride,
1155
0
                        1,
1156
0
                        AOMMIN(luma_subblock_size_x >> 1, width / 2 - x),
1157
0
                        bit_depth,
1158
0
                        chroma_subsamp_y,
1159
0
                        chroma_subsamp_x);
1160
0
                }
1161
0
            }
1162
1163
0
            int32_t i = overlap && y ? 1 : 0;
1164
0
            int32_t j = overlap && x ? 1 : 0;
1165
1166
0
            if (use_high_bit_depth) {
1167
0
                add_noise_to_block_hbd(
1168
0
                    params,
1169
0
                    (uint16_t*)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1170
0
                    (uint16_t*)cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1171
0
                        ((x + j) << (1 - chroma_subsamp_x)),
1172
0
                    (uint16_t*)cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1173
0
                        ((x + j) << (1 - chroma_subsamp_x)),
1174
0
                    luma_stride,
1175
0
                    chroma_stride,
1176
0
                    luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride + luma_offset_x + (j << 1),
1177
0
                    cb_grain_block + (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1178
0
                        chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1179
0
                    cr_grain_block + (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1180
0
                        chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1181
0
                    luma_grain_stride,
1182
0
                    chroma_grain_stride,
1183
0
                    AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1184
0
                    AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j,
1185
0
                    bit_depth,
1186
0
                    chroma_subsamp_y,
1187
0
                    chroma_subsamp_x);
1188
0
            } else {
1189
0
                add_noise_to_block(
1190
0
                    params,
1191
0
                    luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1192
0
                    cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + ((x + j) << (1 - chroma_subsamp_x)),
1193
0
                    cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride + ((x + j) << (1 - chroma_subsamp_x)),
1194
0
                    luma_stride,
1195
0
                    chroma_stride,
1196
0
                    luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride + luma_offset_x + (j << 1),
1197
0
                    cb_grain_block + (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1198
0
                        chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1199
0
                    cr_grain_block + (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1200
0
                        chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1201
0
                    luma_grain_stride,
1202
0
                    chroma_grain_stride,
1203
0
                    AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1204
0
                    AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j,
1205
0
                    bit_depth,
1206
0
                    chroma_subsamp_y,
1207
0
                    chroma_subsamp_x);
1208
0
            }
1209
1210
0
            if (overlap) {
1211
0
                if (x) {
1212
                    // Copy overlapped column bufer to line buffer
1213
0
                    copy_area(y_col_buf + (luma_subblock_size_y << 1), 2, y_line_buf + (x << 1), luma_stride, 2, 2);
1214
1215
0
                    copy_area(cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1216
0
                              2 >> chroma_subsamp_x,
1217
0
                              cb_line_buf + (x << (1 - chroma_subsamp_x)),
1218
0
                              chroma_stride,
1219
0
                              2 >> chroma_subsamp_x,
1220
0
                              2 >> chroma_subsamp_y);
1221
1222
0
                    copy_area(cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1223
0
                              2 >> chroma_subsamp_x,
1224
0
                              cr_line_buf + (x << (1 - chroma_subsamp_x)),
1225
0
                              chroma_stride,
1226
0
                              2 >> chroma_subsamp_x,
1227
0
                              2 >> chroma_subsamp_y);
1228
0
                }
1229
1230
                // Copy grain to the line buffer for overlap with a bottom block
1231
0
                copy_area(luma_grain_block + (luma_offset_y + luma_subblock_size_y) * luma_grain_stride +
1232
0
                              luma_offset_x + ((x ? 2 : 0)),
1233
0
                          luma_grain_stride,
1234
0
                          y_line_buf + ((x ? x + 1 : 0) << 1),
1235
0
                          luma_stride,
1236
0
                          AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0),
1237
0
                          2);
1238
1239
0
                copy_area(cb_grain_block + (chroma_offset_y + chroma_subblock_size_y) * chroma_grain_stride +
1240
0
                              chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1241
0
                          chroma_grain_stride,
1242
0
                          cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1243
0
                          chroma_stride,
1244
0
                          AOMMIN(chroma_subblock_size_x, ((width - (x << 1)) >> chroma_subsamp_x)) -
1245
0
                              (x ? 2 >> chroma_subsamp_x : 0),
1246
0
                          2 >> chroma_subsamp_y);
1247
1248
0
                copy_area(cr_grain_block + (chroma_offset_y + chroma_subblock_size_y) * chroma_grain_stride +
1249
0
                              chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1250
0
                          chroma_grain_stride,
1251
0
                          cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1252
0
                          chroma_stride,
1253
0
                          AOMMIN(chroma_subblock_size_x, ((width - (x << 1)) >> chroma_subsamp_x)) -
1254
0
                              (x ? 2 >> chroma_subsamp_x : 0),
1255
0
                          2 >> chroma_subsamp_y);
1256
1257
                // Copy grain to the column buffer for overlap with the next block to
1258
                // the right
1259
1260
0
                copy_area(luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x + luma_subblock_size_x,
1261
0
                          luma_grain_stride,
1262
0
                          y_col_buf,
1263
0
                          2,
1264
0
                          2,
1265
0
                          AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1266
1267
0
                copy_area(
1268
0
                    cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x + chroma_subblock_size_x,
1269
0
                    chroma_grain_stride,
1270
0
                    cb_col_buf,
1271
0
                    2 >> chroma_subsamp_x,
1272
0
                    2 >> chroma_subsamp_x,
1273
0
                    AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y), (height - (y << 1)) >> chroma_subsamp_y));
1274
1275
0
                copy_area(
1276
0
                    cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x + chroma_subblock_size_x,
1277
0
                    chroma_grain_stride,
1278
0
                    cr_col_buf,
1279
0
                    2 >> chroma_subsamp_x,
1280
0
                    2 >> chroma_subsamp_x,
1281
0
                    AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y), (height - (y << 1)) >> chroma_subsamp_y));
1282
0
            }
1283
0
        }
1284
0
    }
1285
1286
0
    dealloc_arrays(params,
1287
0
                   &pred_pos_luma,
1288
0
                   &pred_pos_chroma,
1289
0
                   &luma_grain_block,
1290
0
                   &cb_grain_block,
1291
0
                   &cr_grain_block,
1292
0
                   &y_line_buf,
1293
0
                   &cb_line_buf,
1294
0
                   &cr_line_buf,
1295
0
                   &y_col_buf,
1296
0
                   &cb_col_buf,
1297
0
                   &cr_col_buf);
1298
0
}