/src/x265/source/encoder/weightPrediction.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Author: Shazeb Nawaz Khan <shazeb@multicorewareinc.com> |
5 | | * Steve Borho <steve@borho.org> |
6 | | * Kavitha Sampas <kavitha@multicorewareinc.com> |
7 | | * Min Chen <chenm003@163.com> |
8 | | * |
9 | | * This program is free software; you can redistribute it and/or modify |
10 | | * it under the terms of the GNU General Public License as published by |
11 | | * the Free Software Foundation; either version 2 of the License, or |
12 | | * (at your option) any later version. |
13 | | * |
14 | | * This program is distributed in the hope that it will be useful, |
15 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | * GNU General Public License for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU General Public License |
20 | | * along with this program; if not, write to the Free Software |
21 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
22 | | * |
23 | | * This program is also available under a commercial proprietary license. |
24 | | * For more information, contact us at license @ x265.com. |
25 | | *****************************************************************************/ |
26 | | |
27 | | #include "common.h" |
28 | | #include "frame.h" |
29 | | #include "picyuv.h" |
30 | | #include "lowres.h" |
31 | | #include "slice.h" |
32 | | #include "mv.h" |
33 | | #include "bitstream.h" |
34 | | #include "threading.h" |
35 | | |
36 | | using namespace X265_NS; |
37 | | namespace { |
38 | | struct Cache |
39 | | { |
40 | | const int * intraCost; |
41 | | int numPredDir; |
42 | | int csp; |
43 | | int hshift; |
44 | | int vshift; |
45 | | int lowresWidthInCU; |
46 | | int lowresHeightInCU; |
47 | | }; |
48 | | |
49 | | int sliceHeaderCost(WeightParam *w, int lambda, int bChroma) |
50 | 0 | { |
51 | | /* 4 times higher, because chroma is analyzed at full resolution. */ |
52 | 0 | if (bChroma) |
53 | 0 | lambda *= 4; |
54 | 0 | int denomCost = bs_size_ue(w[0].log2WeightDenom) * (2 - bChroma); |
55 | 0 | return lambda * (10 + denomCost + 2 * (bs_size_se(w[0].inputWeight) + bs_size_se(w[0].inputOffset))); |
56 | 0 | } |
57 | | |
58 | | /* make a motion compensated copy of lowres ref into mcout with the same stride. |
59 | | * The borders of mcout are not extended */ |
60 | | void mcLuma(pixel* mcout, Lowres& ref, const MV * mvs) |
61 | 0 | { |
62 | 0 | intptr_t stride = ref.lumaStride; |
63 | 0 | const int mvshift = 1 << 2; |
64 | 0 | const int cuSize = 8; |
65 | 0 | MV mvmin, mvmax; |
66 | |
|
67 | 0 | int cu = 0; |
68 | |
|
69 | 0 | for (int y = 0; y < ref.lines; y += cuSize) |
70 | 0 | { |
71 | 0 | intptr_t pixoff = y * stride; |
72 | 0 | mvmin.y = (int32_t)((-y - 8) * mvshift); |
73 | 0 | mvmax.y = (int32_t)((ref.lines - y - 1 + 8) * mvshift); |
74 | |
|
75 | 0 | for (int x = 0; x < ref.width; x += cuSize, pixoff += cuSize, cu++) |
76 | 0 | { |
77 | 0 | ALIGN_VAR_16(pixel, buf8x8[8 * 8]); |
78 | 0 | intptr_t bstride = 8; |
79 | 0 | mvmin.x = (int32_t)((-x - 8) * mvshift); |
80 | 0 | mvmax.x = (int32_t)((ref.width - x - 1 + 8) * mvshift); |
81 | | |
82 | | /* clip MV to available pixels */ |
83 | 0 | MV mv = mvs[cu]; |
84 | 0 | mv = mv.clipped(mvmin, mvmax); |
85 | 0 | pixel *tmp = ref.lowresMC(pixoff, mv, buf8x8, bstride, 0); |
86 | 0 | primitives.cu[BLOCK_8x8].copy_pp(mcout + pixoff, stride, tmp, bstride); |
87 | 0 | } |
88 | 0 | } |
89 | 0 | } |
90 | | |
91 | | /* use lowres MVs from lookahead to generate a motion compensated chroma plane. |
92 | | * if a block had cheaper lowres cost as intra, we treat it as MV 0 */ |
93 | | void mcChroma(pixel * mcout, |
94 | | pixel * src, |
95 | | intptr_t stride, |
96 | | const MV * mvs, |
97 | | const Cache& cache, |
98 | | int height, |
99 | | int width) |
100 | 0 | { |
101 | | /* the motion vectors correspond to 8x8 lowres luma blocks, or 16x16 fullres |
102 | | * luma blocks. We have to adapt block size to chroma csp */ |
103 | 0 | int csp = cache.csp; |
104 | 0 | int bw = 16 >> cache.hshift; |
105 | 0 | int bh = 16 >> cache.vshift; |
106 | 0 | const int mvshift = 1 << 2; |
107 | 0 | MV mvmin, mvmax; |
108 | |
|
109 | 0 | for (int y = 0; y < height; y += bh) |
110 | 0 | { |
111 | | /* note: lowres block count per row might be different from chroma block |
112 | | * count per row because of rounding issues, so be very careful with indexing |
113 | | * into the lowres structures */ |
114 | 0 | int cu = y * cache.lowresWidthInCU; |
115 | 0 | intptr_t pixoff = y * stride; |
116 | 0 | mvmin.y = (int32_t)((-y - 8) * mvshift); |
117 | 0 | mvmax.y = (int32_t)((height - y - 1 + 8) * mvshift); |
118 | |
|
119 | 0 | for (int x = 0; x < width; x += bw, cu++, pixoff += bw) |
120 | 0 | { |
121 | 0 | if (x < cache.lowresWidthInCU && y < cache.lowresHeightInCU) |
122 | 0 | { |
123 | 0 | MV mv = mvs[cu]; // lowres MV |
124 | 0 | mv <<= 1; // fullres MV |
125 | 0 | mv.x >>= cache.hshift; |
126 | 0 | mv.y >>= cache.vshift; |
127 | | |
128 | | /* clip MV to available pixels */ |
129 | 0 | mvmin.x = (int32_t)((-x - 8) * mvshift); |
130 | 0 | mvmax.x = (int32_t)((width - x - 1 + 8) * mvshift); |
131 | 0 | mv = mv.clipped(mvmin, mvmax); |
132 | |
|
133 | 0 | intptr_t fpeloffset = (mv.y >> 2) * stride + (mv.x >> 2); |
134 | 0 | pixel *temp = src + pixoff + fpeloffset; |
135 | |
|
136 | 0 | int xFrac = mv.x & 7; |
137 | 0 | int yFrac = mv.y & 7; |
138 | 0 | if (!(yFrac | xFrac)) |
139 | 0 | { |
140 | 0 | primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, temp, stride); |
141 | 0 | } |
142 | 0 | else if (!yFrac) |
143 | 0 | { |
144 | 0 | primitives.chroma[csp].pu[LUMA_16x16].filter_hpp(temp, stride, mcout + pixoff, stride, xFrac); |
145 | 0 | } |
146 | 0 | else if (!xFrac) |
147 | 0 | { |
148 | 0 | primitives.chroma[csp].pu[LUMA_16x16].filter_vpp(temp, stride, mcout + pixoff, stride, yFrac); |
149 | 0 | } |
150 | 0 | else |
151 | 0 | { |
152 | 0 | ALIGN_VAR_16(int16_t, immed[16 * (16 + NTAPS_CHROMA - 1)]); |
153 | 0 | primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp, stride, immed, bw, xFrac, 1); |
154 | 0 | primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(immed + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac); |
155 | 0 | } |
156 | 0 | } |
157 | 0 | else |
158 | 0 | { |
159 | 0 | primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, src + pixoff, stride); |
160 | 0 | } |
161 | 0 | } |
162 | 0 | } |
163 | 0 | } |
164 | | |
165 | | /* Measure sum of 8x8 satd costs between source frame and reference |
166 | | * frame (potentially weighted, potentially motion compensated). We |
167 | | * always use source images for this analysis since reference recon |
168 | | * pixels have unreliable availability */ |
169 | | uint32_t weightCost(pixel * fenc, |
170 | | pixel * ref, |
171 | | pixel * weightTemp, |
172 | | intptr_t stride, |
173 | | const Cache & cache, |
174 | | int width, |
175 | | int height, |
176 | | WeightParam * w, |
177 | | bool bLuma) |
178 | 0 | { |
179 | 0 | if (w) |
180 | 0 | { |
181 | | /* make a weighted copy of the reference plane */ |
182 | 0 | int offset = w->inputOffset << (X265_DEPTH - 8); |
183 | 0 | int weight = w->inputWeight; |
184 | 0 | int denom = w->log2WeightDenom; |
185 | 0 | int round = denom ? 1 << (denom - 1) : 0; |
186 | 0 | int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */ |
187 | 0 | int pwidth = ((width + 31) >> 5) << 5; |
188 | 0 | primitives.weight_pp(ref, weightTemp, stride, pwidth, height, |
189 | 0 | weight, round << correction, denom + correction, offset); |
190 | 0 | ref = weightTemp; |
191 | 0 | } |
192 | |
|
193 | 0 | uint32_t cost = 0; |
194 | 0 | pixel *f = fenc, *r = ref; |
195 | |
|
196 | 0 | if (bLuma) |
197 | 0 | { |
198 | 0 | int cu = 0; |
199 | 0 | for (int y = 0; y < height; y += 8, r += 8 * stride, f += 8 * stride) |
200 | 0 | { |
201 | 0 | for (int x = 0; x < width; x += 8, cu++) |
202 | 0 | { |
203 | 0 | int cmp = primitives.pu[LUMA_8x8].satd(r + x, stride, f + x, stride); |
204 | 0 | cost += X265_MIN(cmp, cache.intraCost[cu]); |
205 | 0 | } |
206 | 0 | } |
207 | 0 | } |
208 | 0 | else if (cache.csp == X265_CSP_I444) |
209 | 0 | for (int y = 0; y < height; y += 16, r += 16 * stride, f += 16 * stride) |
210 | 0 | for (int x = 0; x < width; x += 16) |
211 | 0 | cost += primitives.pu[LUMA_16x16].satd(r + x, stride, f + x, stride); |
212 | 0 | else |
213 | 0 | for (int y = 0; y < height; y += 8, r += 8 * stride, f += 8 * stride) |
214 | 0 | for (int x = 0; x < width; x += 8) |
215 | 0 | cost += primitives.pu[LUMA_8x8].satd(r + x, stride, f + x, stride); |
216 | |
|
217 | 0 | return cost; |
218 | 0 | } |
219 | | } |
220 | | |
221 | | namespace X265_NS { |
222 | | void weightAnalyse(Slice& slice, Frame& frame, x265_param& param) |
223 | 0 | { |
224 | 0 | WeightParam wp[2][MAX_NUM_REF][3]; |
225 | 0 | PicYuv *fencPic = frame.m_fencPic; |
226 | 0 | Lowres& fenc = frame.m_lowres; |
227 | |
|
228 | 0 | Cache cache; |
229 | |
|
230 | 0 | memset(&cache, 0, sizeof(cache)); |
231 | 0 | cache.intraCost = fenc.intraCost; |
232 | 0 | cache.numPredDir = slice.isInterP() ? 1 : 2; |
233 | 0 | cache.lowresWidthInCU = fenc.width >> 3; |
234 | 0 | cache.lowresHeightInCU = fenc.lines >> 3; |
235 | 0 | cache.csp = param.internalCsp; |
236 | 0 | cache.hshift = CHROMA_H_SHIFT(cache.csp); |
237 | 0 | cache.vshift = CHROMA_V_SHIFT(cache.csp); |
238 | | |
239 | | /* Use single allocation for motion compensated ref and weight buffers */ |
240 | 0 | pixel *mcbuf = X265_MALLOC(pixel, 2 * fencPic->m_stride * fencPic->m_picHeight); |
241 | 0 | if (!mcbuf) |
242 | 0 | { |
243 | 0 | slice.disableWeights(); |
244 | 0 | return; |
245 | 0 | } |
246 | 0 | pixel *weightTemp = mcbuf + fencPic->m_stride * fencPic->m_picHeight; |
247 | |
|
248 | 0 | int lambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP]; |
249 | 0 | int curPoc = slice.m_poc; |
250 | 0 | const float epsilon = 1.f / 128.f; |
251 | |
|
252 | 0 | int chromaDenom, lumaDenom, denom; |
253 | 0 | chromaDenom = lumaDenom = 7; |
254 | 0 | int numpixels[3]; |
255 | 0 | int w16 = ((fencPic->m_picWidth + 15) >> 4) << 4; |
256 | 0 | int h16 = ((fencPic->m_picHeight + 15) >> 4) << 4; |
257 | 0 | numpixels[0] = w16 * h16; |
258 | 0 | numpixels[1] = numpixels[2] = numpixels[0] >> (cache.hshift + cache.vshift); |
259 | |
|
260 | 0 | for (int list = 0; list < cache.numPredDir; list++) |
261 | 0 | { |
262 | 0 | WeightParam *weights = wp[list][0]; |
263 | 0 | Frame *refFrame = slice.m_refFrameList[list][0]; |
264 | 0 | Lowres& refLowres = refFrame->m_lowres; |
265 | 0 | int diffPoc = abs(curPoc - refFrame->m_poc); |
266 | | |
267 | | /* prepare estimates */ |
268 | 0 | float guessScale[3], fencMean[3], refMean[3]; |
269 | 0 | for (int plane = 0; plane < (param.internalCsp != X265_CSP_I400 ? 3 : 1); plane++) |
270 | 0 | { |
271 | 0 | SET_WEIGHT(weights[plane], false, 1, 0, 0); |
272 | 0 | uint64_t fencVar = fenc.wp_ssd[plane] + !refLowres.wp_ssd[plane]; |
273 | 0 | uint64_t refVar = refLowres.wp_ssd[plane] + !refLowres.wp_ssd[plane]; |
274 | 0 | guessScale[plane] = sqrt((float)fencVar / refVar); |
275 | 0 | fencMean[plane] = (float)fenc.wp_sum[plane] / (numpixels[plane]) / (1 << (X265_DEPTH - 8)); |
276 | 0 | refMean[plane] = (float)refLowres.wp_sum[plane] / (numpixels[plane]) / (1 << (X265_DEPTH - 8)); |
277 | 0 | } |
278 | | |
279 | | /* make sure both our scale factors fit */ |
280 | 0 | while (!list && chromaDenom > 0) |
281 | 0 | { |
282 | 0 | float thresh = 127.f / (1 << chromaDenom); |
283 | 0 | if (guessScale[1] < thresh && guessScale[2] < thresh) |
284 | 0 | break; |
285 | 0 | chromaDenom--; |
286 | 0 | } |
287 | |
|
288 | 0 | SET_WEIGHT(weights[1], false, 1 << chromaDenom, chromaDenom, 0); |
289 | 0 | SET_WEIGHT(weights[2], false, 1 << chromaDenom, chromaDenom, 0); |
290 | |
|
291 | 0 | MV *mvs = NULL; |
292 | |
|
293 | 0 | for (int plane = 0; plane < (param.internalCsp != X265_CSP_I400 ? 3 : 1); plane++) |
294 | 0 | { |
295 | 0 | denom = plane ? chromaDenom : lumaDenom; |
296 | 0 | if (plane && !weights[0].wtPresent) |
297 | 0 | break; |
298 | | |
299 | | /* Early termination */ |
300 | 0 | x265_emms(); |
301 | 0 | if (fabsf(refMean[plane] - fencMean[plane]) < 0.5f && fabsf(1.f - guessScale[plane]) < epsilon) |
302 | 0 | { |
303 | 0 | SET_WEIGHT(weights[plane], 0, 1 << denom, denom, 0); |
304 | 0 | continue; |
305 | 0 | } |
306 | | |
307 | 0 | if (plane) |
308 | 0 | { |
309 | 0 | int scale = x265_clip3(0, 255, (int)(guessScale[plane] * (1 << denom) + 0.5f)); |
310 | 0 | if (scale > 127) |
311 | 0 | continue; |
312 | 0 | weights[plane].inputWeight = scale; |
313 | 0 | } |
314 | 0 | else |
315 | 0 | { |
316 | 0 | weights[plane].setFromWeightAndOffset((int)(guessScale[plane] * (1 << denom) + 0.5f), 0, denom, !list); |
317 | 0 | } |
318 | | |
319 | 0 | int mindenom = weights[plane].log2WeightDenom; |
320 | 0 | int minscale = weights[plane].inputWeight; |
321 | 0 | int minoff = 0; |
322 | |
|
323 | 0 | if (!plane && diffPoc <= param.bframes + 1) |
324 | 0 | { |
325 | 0 | mvs = fenc.lowresMvs[list][diffPoc]; |
326 | | |
327 | | /* test whether this motion search was performed by lookahead */ |
328 | 0 | if (mvs[0].x != 0x7FFF) |
329 | 0 | { |
330 | | /* reference chroma planes must be extended prior to being |
331 | | * used as motion compensation sources */ |
332 | 0 | if (!refFrame->m_bChromaExtended && param.internalCsp != X265_CSP_I400 && frame.m_fencPic->m_picCsp != X265_CSP_I400) |
333 | 0 | { |
334 | 0 | refFrame->m_bChromaExtended = true; |
335 | 0 | PicYuv *refPic = refFrame->m_fencPic; |
336 | 0 | int width = refPic->m_picWidth >> cache.hshift; |
337 | 0 | int height = refPic->m_picHeight >> cache.vshift; |
338 | 0 | extendPicBorder(refPic->m_picOrg[1], refPic->m_strideC, width, height, refPic->m_chromaMarginX, refPic->m_chromaMarginY); |
339 | 0 | extendPicBorder(refPic->m_picOrg[2], refPic->m_strideC, width, height, refPic->m_chromaMarginX, refPic->m_chromaMarginY); |
340 | 0 | } |
341 | 0 | } |
342 | 0 | else |
343 | 0 | mvs = 0; |
344 | 0 | } |
345 | | |
346 | | /* prepare inputs to weight analysis */ |
347 | 0 | pixel *orig; |
348 | 0 | pixel *fref; |
349 | 0 | intptr_t stride; |
350 | 0 | int width, height; |
351 | 0 | switch (plane) |
352 | 0 | { |
353 | 0 | case 0: |
354 | 0 | orig = fenc.lowresPlane[0]; |
355 | 0 | stride = fenc.lumaStride; |
356 | 0 | width = fenc.width; |
357 | 0 | height = fenc.lines; |
358 | 0 | fref = refLowres.lowresPlane[0]; |
359 | 0 | if (mvs) |
360 | 0 | { |
361 | 0 | mcLuma(mcbuf, refLowres, mvs); |
362 | 0 | fref = mcbuf; |
363 | 0 | } |
364 | 0 | break; |
365 | | |
366 | 0 | case 1: |
367 | 0 | orig = fencPic->m_picOrg[1]; |
368 | 0 | stride = fencPic->m_strideC; |
369 | 0 | fref = refFrame->m_fencPic->m_picOrg[1]; |
370 | | |
371 | | /* Clamp the chroma dimensions to the nearest multiple of |
372 | | * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres |
373 | | * blocks and weightCost measures 8x8 blocks. This |
374 | | * potentially ignores some edge pixels, but simplifies the |
375 | | * logic and prevents reading uninitialized pixels. Lowres |
376 | | * planes are border extended and require no clamping. */ |
377 | 0 | width = ((fencPic->m_picWidth >> 4) << 4) >> cache.hshift; |
378 | 0 | height = ((fencPic->m_picHeight >> 4) << 4) >> cache.vshift; |
379 | 0 | if (mvs) |
380 | 0 | { |
381 | 0 | mcChroma(mcbuf, fref, stride, mvs, cache, height, width); |
382 | 0 | fref = mcbuf; |
383 | 0 | } |
384 | 0 | break; |
385 | | |
386 | 0 | case 2: |
387 | 0 | orig = fencPic->m_picOrg[2]; |
388 | 0 | stride = fencPic->m_strideC; |
389 | 0 | fref = refFrame->m_fencPic->m_picOrg[2]; |
390 | 0 | width = ((fencPic->m_picWidth >> 4) << 4) >> cache.hshift; |
391 | 0 | height = ((fencPic->m_picHeight >> 4) << 4) >> cache.vshift; |
392 | 0 | if (mvs) |
393 | 0 | { |
394 | 0 | mcChroma(mcbuf, fref, stride, mvs, cache, height, width); |
395 | 0 | fref = mcbuf; |
396 | 0 | } |
397 | 0 | break; |
398 | | |
399 | 0 | default: |
400 | 0 | slice.disableWeights(); |
401 | 0 | X265_FREE(mcbuf); |
402 | 0 | return; |
403 | 0 | } |
404 | | |
405 | 0 | uint32_t origscore = weightCost(orig, fref, weightTemp, stride, cache, width, height, NULL, !plane); |
406 | 0 | if (!origscore) |
407 | 0 | { |
408 | 0 | SET_WEIGHT(weights[plane], 0, 1 << denom, denom, 0); |
409 | 0 | continue; |
410 | 0 | } |
411 | | |
412 | 0 | uint32_t minscore = origscore; |
413 | 0 | bool bFound = false; |
414 | | |
415 | | /* x264 uses a table lookup here, selecting search range based on preset */ |
416 | 0 | static const int scaleDist = 4; |
417 | 0 | static const int offsetDist = 2; |
418 | |
|
419 | 0 | int startScale = x265_clip3(0, 127, minscale - scaleDist); |
420 | 0 | int endScale = x265_clip3(0, 127, minscale + scaleDist); |
421 | 0 | for (int scale = startScale; scale <= endScale; scale++) |
422 | 0 | { |
423 | 0 | int deltaWeight = scale - (1 << mindenom); |
424 | 0 | if (deltaWeight > 127 || deltaWeight <= -128) |
425 | 0 | continue; |
426 | | |
427 | 0 | x265_emms(); |
428 | 0 | int curScale = scale; |
429 | 0 | int curOffset = (int)(fencMean[plane] - refMean[plane] * curScale / (1 << mindenom) + 0.5f); |
430 | 0 | if (curOffset < -128 || curOffset > 127) |
431 | 0 | { |
432 | | /* Rescale considering the constraints on curOffset. We do it in this order |
433 | | * because scale has a much wider range than offset (because of denom), so |
434 | | * it should almost never need to be clamped. */ |
435 | 0 | curOffset = x265_clip3(-128, 127, curOffset); |
436 | 0 | curScale = (int)((1 << mindenom) * (fencMean[plane] - curOffset) / refMean[plane] + 0.5f); |
437 | 0 | curScale = x265_clip3(0, 127, curScale); |
438 | 0 | } |
439 | |
|
440 | 0 | int startOffset = x265_clip3(-128, 127, curOffset - offsetDist); |
441 | 0 | int endOffset = x265_clip3(-128, 127, curOffset + offsetDist); |
442 | 0 | for (int off = startOffset; off <= endOffset; off++) |
443 | 0 | { |
444 | 0 | WeightParam wsp; |
445 | 0 | SET_WEIGHT(wsp, true, curScale, mindenom, off); |
446 | 0 | uint32_t s = weightCost(orig, fref, weightTemp, stride, cache, width, height, &wsp, !plane) + |
447 | 0 | sliceHeaderCost(&wsp, lambda, !!plane); |
448 | 0 | COPY4_IF_LT(minscore, s, minscale, curScale, minoff, off, bFound, true); |
449 | | |
450 | | /* Don't check any more offsets if the previous one had a lower cost than the current one */ |
451 | 0 | if (minoff == startOffset && off != startOffset) |
452 | 0 | break; |
453 | 0 | } |
454 | 0 | } |
455 | | |
456 | | /* Use a smaller luma denominator if possible */ |
457 | 0 | if (!(plane || list)) |
458 | 0 | { |
459 | 0 | if (mindenom > 0 && !(minscale & 1)) |
460 | 0 | { |
461 | 0 | unsigned long idx; |
462 | 0 | CTZ(idx, minscale); |
463 | 0 | int shift = X265_MIN((int)idx, mindenom); |
464 | 0 | mindenom -= shift; |
465 | 0 | minscale >>= shift; |
466 | 0 | } |
467 | 0 | } |
468 | |
|
469 | 0 | if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f) |
470 | 0 | { |
471 | 0 | SET_WEIGHT(weights[plane], false, 1 << denom, denom, 0); |
472 | 0 | } |
473 | 0 | else |
474 | 0 | { |
475 | 0 | SET_WEIGHT(weights[plane], true, minscale, mindenom, minoff); |
476 | 0 | } |
477 | 0 | } |
478 | | |
479 | 0 | if (weights[0].wtPresent) |
480 | 0 | { |
481 | | // Make sure both chroma channels match |
482 | 0 | if (weights[1].wtPresent != weights[2].wtPresent) |
483 | 0 | { |
484 | 0 | if (weights[1].wtPresent) |
485 | 0 | weights[2] = weights[1]; |
486 | 0 | else |
487 | 0 | weights[1] = weights[2]; |
488 | 0 | } |
489 | 0 | } |
490 | |
|
491 | 0 | lumaDenom = weights[0].log2WeightDenom; |
492 | 0 | chromaDenom = weights[1].log2WeightDenom; |
493 | | |
494 | | /* reset weight states */ |
495 | 0 | for (int ref = 1; ref < slice.m_numRefIdx[list]; ref++) |
496 | 0 | { |
497 | 0 | SET_WEIGHT(wp[list][ref][0], false, 1 << lumaDenom, lumaDenom, 0); |
498 | 0 | SET_WEIGHT(wp[list][ref][1], false, 1 << chromaDenom, chromaDenom, 0); |
499 | 0 | SET_WEIGHT(wp[list][ref][2], false, 1 << chromaDenom, chromaDenom, 0); |
500 | 0 | } |
501 | 0 | } |
502 | | |
503 | 0 | X265_FREE(mcbuf); |
504 | |
|
505 | 0 | memcpy(slice.m_weightPredTable, wp, sizeof(WeightParam) * 2 * MAX_NUM_REF * 3); |
506 | |
|
507 | 0 | if (param.logLevel >= X265_LOG_FULL) |
508 | 0 | { |
509 | 0 | char buf[1024]; |
510 | 0 | int p = 0; |
511 | 0 | bool bWeighted = false; |
512 | |
|
513 | 0 | p = sprintf(buf, "poc: %d weights:", slice.m_poc); |
514 | 0 | int numPredDir = slice.isInterP() ? 1 : 2; |
515 | 0 | for (int list = 0; list < numPredDir; list++) |
516 | 0 | { |
517 | 0 | WeightParam* w = &wp[list][0][0]; |
518 | 0 | if (w[0].wtPresent || w[1].wtPresent || w[2].wtPresent) |
519 | 0 | { |
520 | 0 | bWeighted = true; |
521 | 0 | p += sprintf(buf + p, " [L%d:R0 ", list); |
522 | 0 | if (w[0].wtPresent) |
523 | 0 | p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset); |
524 | 0 | if (w[1].wtPresent) |
525 | 0 | p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset); |
526 | 0 | if (w[2].wtPresent) |
527 | 0 | p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset); |
528 | 0 | p += sprintf(buf + p, "]"); |
529 | 0 | } |
530 | 0 | } |
531 | |
|
532 | 0 | if (bWeighted) |
533 | 0 | { |
534 | 0 | if (p < 80) // pad with spaces to ensure progress line overwritten |
535 | 0 | sprintf(buf + p, "%*s", 80 - p, " "); |
536 | 0 | x265_log(¶m, X265_LOG_FULL, "%s\n", buf); |
537 | 0 | } |
538 | 0 | } |
539 | 0 | } |
540 | | } |