/src/x265/source/common/lowres.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Gopu Govindaswamy <gopu@multicorewareinc.com> |
5 | | * Ashok Kumar Mishra <ashok@multicorewareinc.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | #include "picyuv.h" |
26 | | #include "lowres.h" |
27 | | #include "mv.h" |
28 | | |
29 | | using namespace X265_NS; |
30 | | |
31 | | bool PicQPAdaptationLayer::create(uint32_t width, uint32_t height, uint32_t partWidth, uint32_t partHeight, uint32_t numAQPartInWidthExt, uint32_t numAQPartInHeightExt) |
32 | 0 | { |
33 | 0 | aqPartWidth = partWidth; |
34 | 0 | aqPartHeight = partHeight; |
35 | 0 | numAQPartInWidth = (width + partWidth - 1) / partWidth; |
36 | 0 | numAQPartInHeight = (height + partHeight - 1) / partHeight; |
37 | |
|
38 | 0 | CHECKED_MALLOC_ZERO(dActivity, double, numAQPartInWidthExt * numAQPartInHeightExt); |
39 | 0 | CHECKED_MALLOC_ZERO(dQpOffset, double, numAQPartInWidthExt * numAQPartInHeightExt); |
40 | 0 | CHECKED_MALLOC_ZERO(dCuTreeOffset, double, numAQPartInWidthExt * numAQPartInHeightExt); |
41 | |
|
42 | 0 | if (bQpSize) |
43 | 0 | CHECKED_MALLOC_ZERO(dCuTreeOffset8x8, double, numAQPartInWidthExt * numAQPartInHeightExt); |
44 | |
|
45 | 0 | return true; |
46 | 0 | fail: |
47 | 0 | return false; |
48 | 0 | } |
49 | | |
50 | | bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize) |
51 | 0 | { |
52 | 0 | isLowres = true; |
53 | 0 | bframes = param->bframes; |
54 | 0 | widthFullRes = origPic->m_picWidth; |
55 | 0 | heightFullRes = origPic->m_picHeight; |
56 | 0 | width = origPic->m_picWidth / 2; |
57 | 0 | lines = origPic->m_picHeight / 2; |
58 | 0 | bEnableHME = param->bEnableHME ? 1 : 0; |
59 | 0 | lumaStride = width + 2 * origPic->m_lumaMarginX; |
60 | 0 | if (lumaStride & 31) |
61 | 0 | lumaStride += 32 - (lumaStride & 31); |
62 | 0 | maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; |
63 | 0 | maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; |
64 | 0 | maxBlocksInRowFullRes = maxBlocksInRow * 2; |
65 | 0 | maxBlocksInColFullRes = maxBlocksInCol * 2; |
66 | 0 | int cuCount = maxBlocksInRow * maxBlocksInCol; |
67 | 0 | int cuCountFullRes = (qgSize > 8) ? cuCount : cuCount << 2; |
68 | 0 | isHMELowres = param->bEnableHME ? 1 : 0; |
69 | | |
70 | | /* rounding the width to multiple of lowres CU size */ |
71 | 0 | width = maxBlocksInRow * X265_LOWRES_CU_SIZE; |
72 | 0 | lines = maxBlocksInCol * X265_LOWRES_CU_SIZE; |
73 | |
|
74 | 0 | size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY); |
75 | 0 | size_t padoffset = lumaStride * origPic->m_lumaMarginY + origPic->m_lumaMarginX; |
76 | 0 | if (!!param->rc.aqMode || !!param->rc.hevcAq || !!param->bAQMotion) |
77 | 0 | { |
78 | 0 | CHECKED_MALLOC_ZERO(qpAqOffset, double, cuCountFullRes); |
79 | 0 | CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes); |
80 | 0 | CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes); |
81 | 0 | if (qgSize == 8) |
82 | 0 | CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount); |
83 | 0 | CHECKED_MALLOC_ZERO(edgeInclined, int, cuCountFullRes); |
84 | 0 | } |
85 | | |
86 | 0 | if (origPic->m_param->bAQMotion) |
87 | 0 | CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes); |
88 | 0 | if (origPic->m_param->bDynamicRefine || origPic->m_param->bEnableFades) |
89 | 0 | CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes); |
90 | |
|
91 | 0 | if (!!param->rc.hevcAq) |
92 | 0 | { |
93 | 0 | m_maxCUSize = param->maxCUSize; |
94 | 0 | m_qgSize = qgSize; |
95 | |
|
96 | 0 | uint32_t partWidth, partHeight, nAQPartInWidth, nAQPartInHeight; |
97 | |
|
98 | 0 | pAQLayer = new PicQPAdaptationLayer[4]; |
99 | 0 | maxAQDepth = 0; |
100 | 0 | for (uint32_t d = 0; d < 4; d++) |
101 | 0 | { |
102 | 0 | int ctuSizeIdx = 6 - g_log2Size[param->maxCUSize]; |
103 | 0 | int aqDepth = g_log2Size[param->maxCUSize] - g_log2Size[qgSize]; |
104 | 0 | if (!aqLayerDepth[ctuSizeIdx][aqDepth][d]) |
105 | 0 | continue; |
106 | | |
107 | 0 | pAQLayer->minAQDepth = d; |
108 | 0 | partWidth = param->maxCUSize >> d; |
109 | 0 | partHeight = param->maxCUSize >> d; |
110 | |
|
111 | 0 | if (minAQSize[ctuSizeIdx] == d) |
112 | 0 | { |
113 | 0 | pAQLayer[d].bQpSize = true; |
114 | 0 | nAQPartInWidth = maxBlocksInRow * 2; |
115 | 0 | nAQPartInHeight = maxBlocksInCol * 2; |
116 | 0 | } |
117 | 0 | else |
118 | 0 | { |
119 | 0 | pAQLayer[d].bQpSize = false; |
120 | 0 | nAQPartInWidth = (origPic->m_picWidth + partWidth - 1) / partWidth; |
121 | 0 | nAQPartInHeight = (origPic->m_picHeight + partHeight - 1) / partHeight; |
122 | 0 | } |
123 | |
|
124 | 0 | maxAQDepth++; |
125 | |
|
126 | 0 | pAQLayer[d].create(origPic->m_picWidth, origPic->m_picHeight, partWidth, partHeight, nAQPartInWidth, nAQPartInHeight); |
127 | 0 | } |
128 | 0 | } |
129 | 0 | CHECKED_MALLOC(propagateCost, uint16_t, cuCount); |
130 | | |
131 | | /* allocate lowres buffers */ |
132 | 0 | CHECKED_MALLOC_ZERO(buffer[0], pixel, 4 * planesize); |
133 | |
|
134 | 0 | buffer[1] = buffer[0] + planesize; |
135 | 0 | buffer[2] = buffer[1] + planesize; |
136 | 0 | buffer[3] = buffer[2] + planesize; |
137 | |
|
138 | 0 | lowresPlane[0] = buffer[0] + padoffset; |
139 | 0 | lowresPlane[1] = buffer[1] + padoffset; |
140 | 0 | lowresPlane[2] = buffer[2] + padoffset; |
141 | 0 | lowresPlane[3] = buffer[3] + padoffset; |
142 | |
|
143 | 0 | if (bEnableHME) |
144 | 0 | { |
145 | 0 | intptr_t lumaStrideHalf = lumaStride / 2; |
146 | 0 | if (lumaStrideHalf & 31) |
147 | 0 | lumaStrideHalf += 32 - (lumaStrideHalf & 31); |
148 | 0 | size_t planesizeHalf = planesize / 2; |
149 | 0 | size_t padoffsetHalf = padoffset / 2; |
150 | | /* allocate lower-res buffers */ |
151 | 0 | CHECKED_MALLOC_ZERO(lowerResBuffer[0], pixel, 4 * planesizeHalf); |
152 | |
|
153 | 0 | lowerResBuffer[1] = lowerResBuffer[0] + planesizeHalf; |
154 | 0 | lowerResBuffer[2] = lowerResBuffer[1] + planesizeHalf; |
155 | 0 | lowerResBuffer[3] = lowerResBuffer[2] + planesizeHalf; |
156 | |
|
157 | 0 | lowerResPlane[0] = lowerResBuffer[0] + padoffsetHalf; |
158 | 0 | lowerResPlane[1] = lowerResBuffer[1] + padoffsetHalf; |
159 | 0 | lowerResPlane[2] = lowerResBuffer[2] + padoffsetHalf; |
160 | 0 | lowerResPlane[3] = lowerResBuffer[3] + padoffsetHalf; |
161 | 0 | } |
162 | | |
163 | 0 | CHECKED_MALLOC(intraCost, int32_t, cuCount); |
164 | 0 | CHECKED_MALLOC(intraMode, uint8_t, cuCount); |
165 | |
|
166 | 0 | for (int i = 0; i < bframes + 2; i++) |
167 | 0 | { |
168 | 0 | for (int j = 0; j < bframes + 2; j++) |
169 | 0 | { |
170 | 0 | CHECKED_MALLOC(rowSatds[i][j], int32_t, maxBlocksInCol); |
171 | 0 | CHECKED_MALLOC(lowresCosts[i][j], uint16_t, cuCount); |
172 | 0 | } |
173 | 0 | } |
174 | | |
175 | 0 | for (int i = 0; i < bframes + 2; i++) |
176 | 0 | { |
177 | 0 | CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount); |
178 | 0 | CHECKED_MALLOC(lowresMvs[1][i], MV, cuCount); |
179 | 0 | CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCount); |
180 | 0 | CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCount); |
181 | 0 | if (bEnableHME) |
182 | 0 | { |
183 | 0 | int maxBlocksInRowLowerRes = ((width/2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; |
184 | 0 | int maxBlocksInColLowerRes = ((lines/2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; |
185 | 0 | int cuCountLowerRes = maxBlocksInRowLowerRes * maxBlocksInColLowerRes; |
186 | 0 | CHECKED_MALLOC(lowerResMvs[0][i], MV, cuCountLowerRes); |
187 | 0 | CHECKED_MALLOC(lowerResMvs[1][i], MV, cuCountLowerRes); |
188 | 0 | CHECKED_MALLOC(lowerResMvCosts[0][i], int32_t, cuCountLowerRes); |
189 | 0 | CHECKED_MALLOC(lowerResMvCosts[1][i], int32_t, cuCountLowerRes); |
190 | 0 | } |
191 | 0 | } |
192 | | |
193 | 0 | return true; |
194 | | |
195 | 0 | fail: |
196 | 0 | return false; |
197 | 0 | } |
198 | | |
199 | | void Lowres::destroy() |
200 | 0 | { |
201 | 0 | X265_FREE(buffer[0]); |
202 | 0 | if(bEnableHME) |
203 | 0 | X265_FREE(lowerResBuffer[0]); |
204 | 0 | X265_FREE(intraCost); |
205 | 0 | X265_FREE(intraMode); |
206 | |
|
207 | 0 | for (int i = 0; i < bframes + 2; i++) |
208 | 0 | { |
209 | 0 | for (int j = 0; j < bframes + 2; j++) |
210 | 0 | { |
211 | 0 | X265_FREE(rowSatds[i][j]); |
212 | 0 | X265_FREE(lowresCosts[i][j]); |
213 | 0 | } |
214 | 0 | } |
215 | |
|
216 | 0 | for (int i = 0; i < bframes + 2; i++) |
217 | 0 | { |
218 | 0 | X265_FREE(lowresMvs[0][i]); |
219 | 0 | X265_FREE(lowresMvs[1][i]); |
220 | 0 | X265_FREE(lowresMvCosts[0][i]); |
221 | 0 | X265_FREE(lowresMvCosts[1][i]); |
222 | 0 | if (bEnableHME) |
223 | 0 | { |
224 | 0 | X265_FREE(lowerResMvs[0][i]); |
225 | 0 | X265_FREE(lowerResMvs[1][i]); |
226 | 0 | X265_FREE(lowerResMvCosts[0][i]); |
227 | 0 | X265_FREE(lowerResMvCosts[1][i]); |
228 | 0 | } |
229 | 0 | } |
230 | 0 | X265_FREE(qpAqOffset); |
231 | 0 | X265_FREE(invQscaleFactor); |
232 | 0 | X265_FREE(qpCuTreeOffset); |
233 | 0 | X265_FREE(propagateCost); |
234 | 0 | X265_FREE(invQscaleFactor8x8); |
235 | 0 | X265_FREE(edgeInclined); |
236 | 0 | X265_FREE(qpAqMotionOffset); |
237 | 0 | X265_FREE(blockVariance); |
238 | 0 | if (maxAQDepth > 0) |
239 | 0 | { |
240 | 0 | for (uint32_t d = 0; d < 4; d++) |
241 | 0 | { |
242 | 0 | int ctuSizeIdx = 6 - g_log2Size[m_maxCUSize]; |
243 | 0 | int aqDepth = g_log2Size[m_maxCUSize] - g_log2Size[m_qgSize]; |
244 | 0 | if (!aqLayerDepth[ctuSizeIdx][aqDepth][d]) |
245 | 0 | continue; |
246 | | |
247 | 0 | X265_FREE(pAQLayer[d].dActivity); |
248 | 0 | X265_FREE(pAQLayer[d].dQpOffset); |
249 | 0 | X265_FREE(pAQLayer[d].dCuTreeOffset); |
250 | |
|
251 | 0 | if (pAQLayer[d].bQpSize == true) |
252 | 0 | X265_FREE(pAQLayer[d].dCuTreeOffset8x8); |
253 | 0 | } |
254 | |
|
255 | 0 | delete[] pAQLayer; |
256 | 0 | } |
257 | 0 | } |
258 | | // (re) initialize lowres state |
259 | | void Lowres::init(PicYuv *origPic, int poc) |
260 | 0 | { |
261 | 0 | bLastMiniGopBFrame = false; |
262 | 0 | bKeyframe = false; // Not a keyframe unless identified by lookahead |
263 | 0 | bIsFadeEnd = false; |
264 | 0 | frameNum = poc; |
265 | 0 | leadingBframes = 0; |
266 | 0 | indB = 0; |
267 | 0 | memset(costEst, -1, sizeof(costEst)); |
268 | 0 | memset(weightedCostDelta, 0, sizeof(weightedCostDelta)); |
269 | |
|
270 | 0 | if (qpAqOffset && invQscaleFactor) |
271 | 0 | memset(costEstAq, -1, sizeof(costEstAq)); |
272 | |
|
273 | 0 | for (int y = 0; y < bframes + 2; y++) |
274 | 0 | for (int x = 0; x < bframes + 2; x++) |
275 | 0 | rowSatds[y][x][0] = -1; |
276 | |
|
277 | 0 | for (int i = 0; i < bframes + 2; i++) |
278 | 0 | { |
279 | 0 | lowresMvs[0][i][0].x = 0x7FFF; |
280 | 0 | lowresMvs[1][i][0].x = 0x7FFF; |
281 | 0 | } |
282 | |
|
283 | 0 | for (int i = 0; i < bframes + 2; i++) |
284 | 0 | intraMbs[i] = 0; |
285 | 0 | if (origPic->m_param->rc.vbvBufferSize) |
286 | 0 | for (int i = 0; i < X265_LOOKAHEAD_MAX + 1; i++) |
287 | 0 | plannedType[i] = X265_TYPE_AUTO; |
288 | | |
289 | | /* downscale and generate 4 hpel planes for lookahead */ |
290 | 0 | primitives.frameInitLowres(origPic->m_picOrg[0], |
291 | 0 | lowresPlane[0], lowresPlane[1], lowresPlane[2], lowresPlane[3], |
292 | 0 | origPic->m_stride, lumaStride, width, lines); |
293 | | |
294 | | /* extend hpel planes for motion search */ |
295 | 0 | extendPicBorder(lowresPlane[0], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY); |
296 | 0 | extendPicBorder(lowresPlane[1], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY); |
297 | 0 | extendPicBorder(lowresPlane[2], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY); |
298 | 0 | extendPicBorder(lowresPlane[3], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY); |
299 | | |
300 | 0 | if (origPic->m_param->bEnableHME) |
301 | 0 | { |
302 | 0 | primitives.frameInitLowerRes(lowresPlane[0], |
303 | 0 | lowerResPlane[0], lowerResPlane[1], lowerResPlane[2], lowerResPlane[3], |
304 | 0 | lumaStride, lumaStride/2, (width / 2), (lines / 2)); |
305 | 0 | extendPicBorder(lowerResPlane[0], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); |
306 | 0 | extendPicBorder(lowerResPlane[1], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); |
307 | 0 | extendPicBorder(lowerResPlane[2], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); |
308 | 0 | extendPicBorder(lowerResPlane[3], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); |
309 | 0 | fpelLowerResPlane[0] = lowerResPlane[0]; |
310 | 0 | } |
311 | |
|
312 | 0 | fpelPlane[0] = lowresPlane[0]; |
313 | 0 | } |