/src/libvpx/vp8/encoder/encodeframe.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | #include <limits.h> |
11 | | #include <stdio.h> |
12 | | |
13 | | #include "vpx_config.h" |
14 | | |
15 | | #include "vp8/common/common.h" |
16 | | #include "vp8/common/entropymode.h" |
17 | | #include "vp8/common/extend.h" |
18 | | #include "vp8/common/invtrans.h" |
19 | | #include "vp8/common/quant_common.h" |
20 | | #include "vp8/common/reconinter.h" |
21 | | #include "vp8/common/setupintrarecon.h" |
22 | | #include "vp8/common/threading.h" |
23 | | #include "vp8/encoder/bitstream.h" |
24 | | #include "vp8/encoder/encodeframe.h" |
25 | | #include "vp8/encoder/encodeintra.h" |
26 | | #include "vp8/encoder/encodemb.h" |
27 | | #include "vp8/encoder/onyx_int.h" |
28 | | #include "vp8/encoder/pickinter.h" |
29 | | #include "vp8/encoder/rdopt.h" |
30 | | #include "vp8_rtcd.h" |
31 | | #include "vpx/internal/vpx_codec_internal.h" |
32 | | #include "vpx_dsp_rtcd.h" |
33 | | #include "vpx_mem/vpx_mem.h" |
34 | | #include "vpx_ports/vpx_timer.h" |
35 | | |
36 | | #if CONFIG_MULTITHREAD |
37 | | #include "vp8/encoder/ethreading.h" |
38 | | #endif |
39 | | |
40 | | extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); |
41 | | static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x); |
42 | | |
43 | | #ifdef MODE_STATS |
44 | | unsigned int inter_y_modes[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; |
45 | | unsigned int inter_uv_modes[4] = { 0, 0, 0, 0 }; |
46 | | unsigned int inter_b_modes[15] = { |
47 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
48 | | }; |
49 | | unsigned int y_modes[5] = { 0, 0, 0, 0, 0 }; |
50 | | unsigned int uv_modes[4] = { 0, 0, 0, 0 }; |
51 | | unsigned int b_modes[14] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; |
52 | | #endif |
53 | | |
54 | | /* activity_avg must be positive, or flat regions could get a zero weight |
55 | | * (infinite lambda), which confounds analysis. |
56 | | * This also avoids the need for divide by zero checks in |
57 | | * vp8_activity_masking(). |
58 | | */ |
59 | 0 | #define VP8_ACTIVITY_AVG_MIN (64) |
60 | | |
61 | | /* This is used as a reference when computing the source variance for the |
62 | | * purposes of activity masking. |
63 | | * Eventually this should be replaced by custom no-reference routines, |
64 | | * which will be faster. |
65 | | */ |
66 | | static const unsigned char VP8_VAR_OFFS[16] = { 128, 128, 128, 128, 128, 128, |
67 | | 128, 128, 128, 128, 128, 128, |
68 | | 128, 128, 128, 128 }; |
69 | | |
70 | | /* Original activity measure from Tim T's code. */ |
71 | 0 | static unsigned int tt_activity_measure(MACROBLOCK *x) { |
72 | 0 | unsigned int act; |
73 | 0 | unsigned int sse; |
74 | 0 | /* TODO: This could also be done over smaller areas (8x8), but that would |
75 | 0 | * require extensive changes elsewhere, as lambda is assumed to be fixed |
76 | 0 | * over an entire MB in most of the code. |
77 | 0 | * Another option is to compute four 8x8 variances, and pick a single |
78 | 0 | * lambda using a non-linear combination (e.g., the smallest, or second |
79 | 0 | * smallest, etc.). |
80 | 0 | */ |
81 | 0 | act = vpx_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0, |
82 | 0 | &sse); |
83 | 0 | act = act << 4; |
84 | 0 |
|
85 | 0 | /* If the region is flat, lower the activity some more. */ |
86 | 0 | if (act < 8 << 12) act = act < 5 << 12 ? act : 5 << 12; |
87 | 0 |
|
88 | 0 | return act; |
89 | 0 | } |
90 | | |
91 | | /* Measure the activity of the current macroblock |
92 | | * What we measure here is TBD so abstracted to this function |
93 | | */ |
94 | 0 | #define ALT_ACT_MEASURE 1 |
95 | 0 | static unsigned int mb_activity_measure(MACROBLOCK *x, int mb_row, int mb_col) { |
96 | 0 | unsigned int mb_activity; |
97 | |
|
98 | 0 | if (ALT_ACT_MEASURE) { |
99 | 0 | int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); |
100 | | |
101 | | /* Or use an alternative. */ |
102 | 0 | mb_activity = vp8_encode_intra(x, use_dc_pred); |
103 | 0 | } else { |
104 | | /* Original activity measure from Tim T's code. */ |
105 | 0 | mb_activity = tt_activity_measure(x); |
106 | 0 | } |
107 | |
|
108 | 0 | if (mb_activity < VP8_ACTIVITY_AVG_MIN) mb_activity = VP8_ACTIVITY_AVG_MIN; |
109 | |
|
110 | 0 | return mb_activity; |
111 | 0 | } |
112 | | |
113 | | /* Calculate an "average" mb activity value for the frame */ |
114 | | #define ACT_MEDIAN 0 |
115 | 0 | static void calc_av_activity(VP8_COMP *cpi, int64_t activity_sum) { |
116 | | #if ACT_MEDIAN |
117 | | /* Find median: Simple n^2 algorithm for experimentation */ |
118 | | { |
119 | | unsigned int median; |
120 | | unsigned int i, j; |
121 | | unsigned int *sortlist; |
122 | | unsigned int tmp; |
123 | | |
124 | | /* Create a list to sort to */ |
125 | | CHECK_MEM_ERROR(&cpi->common.error, sortlist, |
126 | | vpx_calloc(sizeof(unsigned int), cpi->common.MBs)); |
127 | | |
128 | | /* Copy map to sort list */ |
129 | | memcpy(sortlist, cpi->mb_activity_map, |
130 | | sizeof(unsigned int) * cpi->common.MBs); |
131 | | |
132 | | /* Ripple each value down to its correct position */ |
133 | | for (i = 1; i < cpi->common.MBs; ++i) { |
134 | | for (j = i; j > 0; j--) { |
135 | | if (sortlist[j] < sortlist[j - 1]) { |
136 | | /* Swap values */ |
137 | | tmp = sortlist[j - 1]; |
138 | | sortlist[j - 1] = sortlist[j]; |
139 | | sortlist[j] = tmp; |
140 | | } else |
141 | | break; |
142 | | } |
143 | | } |
144 | | |
145 | | /* Even number MBs so estimate median as mean of two either side. */ |
146 | | median = (1 + sortlist[cpi->common.MBs >> 1] + |
147 | | sortlist[(cpi->common.MBs >> 1) + 1]) >> |
148 | | 1; |
149 | | |
150 | | cpi->activity_avg = median; |
151 | | |
152 | | vpx_free(sortlist); |
153 | | } |
154 | | #else |
155 | | /* Simple mean for now */ |
156 | 0 | cpi->activity_avg = (unsigned int)(activity_sum / cpi->common.MBs); |
157 | 0 | #endif |
158 | |
|
159 | 0 | if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) { |
160 | 0 | cpi->activity_avg = VP8_ACTIVITY_AVG_MIN; |
161 | 0 | } |
162 | | |
163 | | /* Experimental code: return fixed value normalized for several clips */ |
164 | 0 | if (ALT_ACT_MEASURE) cpi->activity_avg = 100000; |
165 | 0 | } |
166 | | |
167 | | #define USE_ACT_INDEX 0 |
168 | | #define OUTPUT_NORM_ACT_STATS 0 |
169 | | |
170 | | #if USE_ACT_INDEX |
171 | | /* Calculate and activity index for each mb */ |
172 | | static void calc_activity_index(VP8_COMP *cpi, MACROBLOCK *x) { |
173 | | VP8_COMMON *const cm = &cpi->common; |
174 | | int mb_row, mb_col; |
175 | | |
176 | | int64_t act; |
177 | | int64_t a; |
178 | | int64_t b; |
179 | | |
180 | | #if OUTPUT_NORM_ACT_STATS |
181 | | FILE *f = fopen("norm_act.stt", "a"); |
182 | | fprintf(f, "\n%12d\n", cpi->activity_avg); |
183 | | #endif |
184 | | |
185 | | /* Reset pointers to start of activity map */ |
186 | | x->mb_activity_ptr = cpi->mb_activity_map; |
187 | | |
188 | | /* Calculate normalized mb activity number. */ |
189 | | for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { |
190 | | /* for each macroblock col in image */ |
191 | | for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { |
192 | | /* Read activity from the map */ |
193 | | act = *(x->mb_activity_ptr); |
194 | | |
195 | | /* Calculate a normalized activity number */ |
196 | | a = act + 4 * cpi->activity_avg; |
197 | | b = 4 * act + cpi->activity_avg; |
198 | | |
199 | | if (b >= a) |
200 | | *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1; |
201 | | else |
202 | | *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b); |
203 | | |
204 | | #if OUTPUT_NORM_ACT_STATS |
205 | | fprintf(f, " %6d", *(x->mb_activity_ptr)); |
206 | | #endif |
207 | | /* Increment activity map pointers */ |
208 | | x->mb_activity_ptr++; |
209 | | } |
210 | | |
211 | | #if OUTPUT_NORM_ACT_STATS |
212 | | fprintf(f, "\n"); |
213 | | #endif |
214 | | } |
215 | | |
216 | | #if OUTPUT_NORM_ACT_STATS |
217 | | fclose(f); |
218 | | #endif |
219 | | } |
220 | | #endif |
221 | | |
222 | | /* Loop through all MBs. Note activity of each, average activity and |
223 | | * calculate a normalized activity for each |
224 | | */ |
225 | 0 | static void build_activity_map(VP8_COMP *cpi) { |
226 | 0 | MACROBLOCK *const x = &cpi->mb; |
227 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
228 | 0 | VP8_COMMON *const cm = &cpi->common; |
229 | |
|
230 | 0 | #if ALT_ACT_MEASURE |
231 | 0 | YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; |
232 | 0 | int recon_yoffset; |
233 | 0 | int recon_y_stride = new_yv12->y_stride; |
234 | 0 | #endif |
235 | |
|
236 | 0 | int mb_row, mb_col; |
237 | 0 | unsigned int mb_activity; |
238 | 0 | int64_t activity_sum = 0; |
239 | | |
240 | | /* for each macroblock row in image */ |
241 | 0 | for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { |
242 | 0 | #if ALT_ACT_MEASURE |
243 | | /* reset above block coeffs */ |
244 | 0 | xd->up_available = (mb_row != 0); |
245 | 0 | recon_yoffset = (mb_row * recon_y_stride * 16); |
246 | 0 | #endif |
247 | | /* for each macroblock col in image */ |
248 | 0 | for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { |
249 | 0 | #if ALT_ACT_MEASURE |
250 | 0 | xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; |
251 | 0 | xd->left_available = (mb_col != 0); |
252 | 0 | recon_yoffset += 16; |
253 | 0 | #endif |
254 | | /* Copy current mb to a buffer */ |
255 | 0 | vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); |
256 | | |
257 | | /* measure activity */ |
258 | 0 | mb_activity = mb_activity_measure(x, mb_row, mb_col); |
259 | | |
260 | | /* Keep frame sum */ |
261 | 0 | activity_sum += mb_activity; |
262 | | |
263 | | /* Store MB level activity details. */ |
264 | 0 | *x->mb_activity_ptr = mb_activity; |
265 | | |
266 | | /* Increment activity map pointer */ |
267 | 0 | x->mb_activity_ptr++; |
268 | | |
269 | | /* adjust to the next column of source macroblocks */ |
270 | 0 | x->src.y_buffer += 16; |
271 | 0 | } |
272 | | |
273 | | /* adjust to the next row of mbs */ |
274 | 0 | x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; |
275 | |
|
276 | 0 | #if ALT_ACT_MEASURE |
277 | | /* extend the recon for intra prediction */ |
278 | 0 | vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, |
279 | 0 | xd->dst.v_buffer + 8); |
280 | 0 | #endif |
281 | 0 | } |
282 | | |
283 | | /* Calculate an "average" MB activity */ |
284 | 0 | calc_av_activity(cpi, activity_sum); |
285 | |
|
286 | | #if USE_ACT_INDEX |
287 | | /* Calculate an activity index number of each mb */ |
288 | | calc_activity_index(cpi, x); |
289 | | #endif |
290 | 0 | } |
291 | | |
292 | | /* Macroblock activity masking */ |
293 | 0 | void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) { |
294 | | #if USE_ACT_INDEX |
295 | | x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2); |
296 | | x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); |
297 | | x->errorperbit += (x->errorperbit == 0); |
298 | | #else |
299 | 0 | int64_t a; |
300 | 0 | int64_t b; |
301 | 0 | int64_t act = *(x->mb_activity_ptr); |
302 | | |
303 | | /* Apply the masking to the RD multiplier. */ |
304 | 0 | a = act + (2 * cpi->activity_avg); |
305 | 0 | b = (2 * act) + cpi->activity_avg; |
306 | |
|
307 | 0 | x->rdmult = (unsigned int)(((int64_t)x->rdmult * b + (a >> 1)) / a); |
308 | 0 | x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); |
309 | 0 | x->errorperbit += (x->errorperbit == 0); |
310 | 0 | #endif |
311 | | |
312 | | /* Activity based Zbin adjustment */ |
313 | 0 | adjust_act_zbin(cpi, x); |
314 | 0 | } |
315 | | |
316 | | static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row, |
317 | | MACROBLOCK *x, MACROBLOCKD *xd, TOKENEXTRA **tp, |
318 | 328k | int *segment_counts, int *totalrate) { |
319 | 328k | int recon_yoffset, recon_uvoffset; |
320 | 328k | int mb_col; |
321 | 328k | int ref_fb_idx = cm->lst_fb_idx; |
322 | 328k | int dst_fb_idx = cm->new_fb_idx; |
323 | 328k | int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; |
324 | 328k | int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; |
325 | 328k | int map_index = (mb_row * cpi->common.mb_cols); |
326 | | |
327 | | #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) |
328 | | const int num_part = (1 << cm->multi_token_partition); |
329 | | TOKENEXTRA *tp_start = cpi->tok; |
330 | | vp8_writer *w; |
331 | | #endif |
332 | | |
333 | 328k | #if CONFIG_MULTITHREAD |
334 | 328k | const int nsync = cpi->mt_sync_range; |
335 | 328k | vpx_atomic_int rightmost_col = VPX_ATOMIC_INIT(cm->mb_cols + nsync); |
336 | 328k | const vpx_atomic_int *last_row_current_mb_col; |
337 | 328k | vpx_atomic_int *current_mb_col = NULL; |
338 | | |
339 | 328k | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) { |
340 | 0 | current_mb_col = &cpi->mt_current_mb_col[mb_row]; |
341 | 0 | } |
342 | 328k | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0 && mb_row != 0) { |
343 | 0 | last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; |
344 | 328k | } else { |
345 | 328k | last_row_current_mb_col = &rightmost_col; |
346 | 328k | } |
347 | 328k | #endif |
348 | | |
349 | | #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) |
350 | | if (num_part > 1) |
351 | | w = &cpi->bc[1 + (mb_row % num_part)]; |
352 | | else |
353 | | w = &cpi->bc[1]; |
354 | | #endif |
355 | | |
356 | | /* reset above block coeffs */ |
357 | 328k | xd->above_context = cm->above_context; |
358 | | |
359 | 328k | xd->up_available = (mb_row != 0); |
360 | 328k | recon_yoffset = (mb_row * recon_y_stride * 16); |
361 | 328k | recon_uvoffset = (mb_row * recon_uv_stride * 8); |
362 | | |
363 | 328k | cpi->tplist[mb_row].start = *tp; |
364 | | /* printf("Main mb_row = %d\n", mb_row); */ |
365 | | |
366 | | /* Distance of Mb to the top & bottom edges, specified in 1/8th pel |
367 | | * units as they are always compared to values that are in 1/8th pel |
368 | | */ |
369 | 328k | xd->mb_to_top_edge = -((mb_row * 16) << 3); |
370 | 328k | xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; |
371 | | |
372 | | /* Set up limit values for vertical motion vector components |
373 | | * to prevent them extending beyond the UMV borders |
374 | | */ |
375 | 328k | x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); |
376 | 328k | x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); |
377 | | |
378 | | /* Set the mb activity pointer to the start of the row. */ |
379 | 328k | x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; |
380 | | |
381 | | /* for each macroblock col in image */ |
382 | 2.48M | for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { |
383 | | #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) |
384 | | *tp = cpi->tok; |
385 | | #endif |
386 | | /* Distance of Mb to the left & right edges, specified in |
387 | | * 1/8th pel units as they are always compared to values |
388 | | * that are in 1/8th pel units |
389 | | */ |
390 | 2.15M | xd->mb_to_left_edge = -((mb_col * 16) << 3); |
391 | 2.15M | xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; |
392 | | |
393 | | /* Set up limit values for horizontal motion vector components |
394 | | * to prevent them extending beyond the UMV borders |
395 | | */ |
396 | 2.15M | x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); |
397 | 2.15M | x->mv_col_max = |
398 | 2.15M | ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); |
399 | | |
400 | 2.15M | xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; |
401 | 2.15M | xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; |
402 | 2.15M | xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; |
403 | 2.15M | xd->left_available = (mb_col != 0); |
404 | | |
405 | 2.15M | x->rddiv = cpi->RDDIV; |
406 | 2.15M | x->rdmult = cpi->RDMULT; |
407 | | |
408 | | /* Copy current mb to a buffer */ |
409 | 2.15M | vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); |
410 | | |
411 | 2.15M | #if CONFIG_MULTITHREAD |
412 | 2.15M | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) { |
413 | 0 | if (((mb_col - 1) % nsync) == 0) { |
414 | 0 | vpx_atomic_store_release(current_mb_col, mb_col - 1); |
415 | 0 | } |
416 | |
|
417 | 0 | if (mb_row && !(mb_col & (nsync - 1))) { |
418 | 0 | vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync); |
419 | 0 | } |
420 | 0 | } |
421 | 2.15M | #endif |
422 | | |
423 | 2.15M | if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x); |
424 | | |
425 | | /* Is segmentation enabled */ |
426 | | /* MB level adjustment to quantizer */ |
427 | 2.15M | if (xd->segmentation_enabled) { |
428 | | /* Code to set segment id in xd->mbmi.segment_id for current MB |
429 | | * (with range checking) |
430 | | */ |
431 | 0 | if (cpi->segmentation_map[map_index + mb_col] <= 3) { |
432 | 0 | xd->mode_info_context->mbmi.segment_id = |
433 | 0 | cpi->segmentation_map[map_index + mb_col]; |
434 | 0 | } else { |
435 | 0 | xd->mode_info_context->mbmi.segment_id = 0; |
436 | 0 | } |
437 | |
|
438 | 0 | vp8cx_mb_init_quantizer(cpi, x, 1); |
439 | 2.15M | } else { |
440 | | /* Set to Segment 0 by default */ |
441 | 2.15M | xd->mode_info_context->mbmi.segment_id = 0; |
442 | 2.15M | } |
443 | | |
444 | 2.15M | x->active_ptr = cpi->active_map + map_index + mb_col; |
445 | | |
446 | 2.15M | if (cm->frame_type == KEY_FRAME) { |
447 | 880k | const int intra_rate_cost = vp8cx_encode_intra_macroblock(cpi, x, tp); |
448 | 880k | if (INT_MAX - *totalrate > intra_rate_cost) |
449 | 880k | *totalrate += intra_rate_cost; |
450 | 0 | else |
451 | 0 | *totalrate = INT_MAX; |
452 | | #ifdef MODE_STATS |
453 | | y_modes[xd->mbmi.mode]++; |
454 | | #endif |
455 | 1.27M | } else { |
456 | 1.27M | const int inter_rate_cost = vp8cx_encode_inter_macroblock( |
457 | 1.27M | cpi, x, tp, recon_yoffset, recon_uvoffset, mb_row, mb_col); |
458 | 1.27M | if (INT_MAX - *totalrate > inter_rate_cost) |
459 | 1.27M | *totalrate += inter_rate_cost; |
460 | 0 | else |
461 | 0 | *totalrate = INT_MAX; |
462 | | |
463 | | #ifdef MODE_STATS |
464 | | inter_y_modes[xd->mbmi.mode]++; |
465 | | |
466 | | if (xd->mbmi.mode == SPLITMV) { |
467 | | int b; |
468 | | |
469 | | for (b = 0; b < xd->mbmi.partition_count; ++b) { |
470 | | inter_b_modes[x->partition->bmi[b].mode]++; |
471 | | } |
472 | | } |
473 | | |
474 | | #endif |
475 | | |
476 | | // Keep track of how many (consecutive) times a block is coded |
477 | | // as ZEROMV_LASTREF, for base layer frames. |
478 | | // Reset to 0 if its coded as anything else. |
479 | 1.27M | if (cpi->current_layer == 0) { |
480 | 1.27M | if (xd->mode_info_context->mbmi.mode == ZEROMV && |
481 | 1.27M | xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) { |
482 | | // Increment, check for wrap-around. |
483 | 76.1k | if (cpi->consec_zero_last[map_index + mb_col] < 255) { |
484 | 76.1k | cpi->consec_zero_last[map_index + mb_col] += 1; |
485 | 76.1k | } |
486 | 76.1k | if (cpi->consec_zero_last_mvbias[map_index + mb_col] < 255) { |
487 | 76.1k | cpi->consec_zero_last_mvbias[map_index + mb_col] += 1; |
488 | 76.1k | } |
489 | 1.20M | } else { |
490 | 1.20M | cpi->consec_zero_last[map_index + mb_col] = 0; |
491 | 1.20M | cpi->consec_zero_last_mvbias[map_index + mb_col] = 0; |
492 | 1.20M | } |
493 | 1.27M | if (x->zero_last_dot_suppress) { |
494 | 223 | cpi->consec_zero_last_mvbias[map_index + mb_col] = 0; |
495 | 223 | } |
496 | 1.27M | } |
497 | | |
498 | | /* Special case code for cyclic refresh |
499 | | * If cyclic update enabled then copy xd->mbmi.segment_id; (which |
500 | | * may have been updated based on mode during |
501 | | * vp8cx_encode_inter_macroblock()) back into the global |
502 | | * segmentation map |
503 | | */ |
504 | 1.27M | if ((cpi->current_layer == 0) && |
505 | 1.27M | (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) { |
506 | 0 | cpi->segmentation_map[map_index + mb_col] = |
507 | 0 | xd->mode_info_context->mbmi.segment_id; |
508 | | |
509 | | /* If the block has been refreshed mark it as clean (the |
510 | | * magnitude of the -ve influences how long it will be before |
511 | | * we consider another refresh): |
512 | | * Else if it was coded (last frame 0,0) and has not already |
513 | | * been refreshed then mark it as a candidate for cleanup |
514 | | * next time (marked 0) else mark it as dirty (1). |
515 | | */ |
516 | 0 | if (xd->mode_info_context->mbmi.segment_id) { |
517 | 0 | cpi->cyclic_refresh_map[map_index + mb_col] = -1; |
518 | 0 | } else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && |
519 | 0 | (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) { |
520 | 0 | if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) { |
521 | 0 | cpi->cyclic_refresh_map[map_index + mb_col] = 0; |
522 | 0 | } |
523 | 0 | } else { |
524 | 0 | cpi->cyclic_refresh_map[map_index + mb_col] = 1; |
525 | 0 | } |
526 | 0 | } |
527 | 1.27M | } |
528 | | |
529 | 2.15M | cpi->tplist[mb_row].stop = *tp; |
530 | | |
531 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
532 | | /* pack tokens for this MB */ |
533 | | { |
534 | | int tok_count = *tp - tp_start; |
535 | | vp8_pack_tokens(w, tp_start, tok_count); |
536 | | } |
537 | | #endif |
538 | | /* Increment pointer into gf usage flags structure. */ |
539 | 2.15M | x->gf_active_ptr++; |
540 | | |
541 | | /* Increment the activity mask pointers. */ |
542 | 2.15M | x->mb_activity_ptr++; |
543 | | |
544 | | /* adjust to the next column of macroblocks */ |
545 | 2.15M | x->src.y_buffer += 16; |
546 | 2.15M | x->src.u_buffer += 8; |
547 | 2.15M | x->src.v_buffer += 8; |
548 | | |
549 | 2.15M | recon_yoffset += 16; |
550 | 2.15M | recon_uvoffset += 8; |
551 | | |
552 | | /* Keep track of segment usage */ |
553 | 2.15M | segment_counts[xd->mode_info_context->mbmi.segment_id]++; |
554 | | |
555 | | /* skip to next mb */ |
556 | 2.15M | xd->mode_info_context++; |
557 | 2.15M | x->partition_info++; |
558 | 2.15M | xd->above_context++; |
559 | 2.15M | } |
560 | | |
561 | | /* extend the recon for intra prediction */ |
562 | 328k | vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, |
563 | 328k | xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); |
564 | | |
565 | 328k | #if CONFIG_MULTITHREAD |
566 | 328k | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) { |
567 | 0 | vpx_atomic_store_release(current_mb_col, |
568 | 0 | vpx_atomic_load_acquire(&rightmost_col)); |
569 | 0 | } |
570 | 328k | #endif |
571 | | |
572 | | /* this is to account for the border */ |
573 | 328k | xd->mode_info_context++; |
574 | 328k | x->partition_info++; |
575 | 328k | } |
576 | | |
577 | 88.0k | static void init_encode_frame_mb_context(VP8_COMP *cpi) { |
578 | 88.0k | MACROBLOCK *const x = &cpi->mb; |
579 | 88.0k | VP8_COMMON *const cm = &cpi->common; |
580 | 88.0k | MACROBLOCKD *const xd = &x->e_mbd; |
581 | | |
582 | | /* GF active flags data structure */ |
583 | 88.0k | x->gf_active_ptr = (signed char *)cpi->gf_active_flags; |
584 | | |
585 | | /* Activity map pointer */ |
586 | 88.0k | x->mb_activity_ptr = cpi->mb_activity_map; |
587 | | |
588 | 88.0k | x->act_zbin_adj = 0; |
589 | | |
590 | 88.0k | x->partition_info = x->pi; |
591 | | |
592 | 88.0k | xd->mode_info_context = cm->mi; |
593 | 88.0k | xd->mode_info_stride = cm->mode_info_stride; |
594 | | |
595 | 88.0k | xd->frame_type = cm->frame_type; |
596 | | |
597 | | /* reset intra mode contexts */ |
598 | 88.0k | if (cm->frame_type == KEY_FRAME) vp8_init_mbmode_probs(cm); |
599 | | |
600 | | /* Copy data over into macro block data structures. */ |
601 | 88.0k | x->src = *cpi->Source; |
602 | 88.0k | xd->pre = cm->yv12_fb[cm->lst_fb_idx]; |
603 | 88.0k | xd->dst = cm->yv12_fb[cm->new_fb_idx]; |
604 | | |
605 | | /* set up frame for intra coded blocks */ |
606 | 88.0k | vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); |
607 | | |
608 | 88.0k | vp8_build_block_offsets(x); |
609 | | |
610 | 88.0k | xd->mode_info_context->mbmi.mode = DC_PRED; |
611 | 88.0k | xd->mode_info_context->mbmi.uv_mode = DC_PRED; |
612 | | |
613 | 88.0k | xd->left_context = &cm->left_context; |
614 | | |
615 | 88.0k | x->mvc = cm->fc.mvc; |
616 | | |
617 | 88.0k | memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); |
618 | | |
619 | | /* Special case treatment when GF and ARF are not sensible options |
620 | | * for reference |
621 | | */ |
622 | 88.0k | if (cpi->ref_frame_flags == VP8_LAST_FRAME) { |
623 | 16.8k | vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 255, |
624 | 16.8k | 128); |
625 | 71.1k | } else if ((cpi->oxcf.number_of_layers > 1) && |
626 | 71.1k | (cpi->ref_frame_flags == VP8_GOLD_FRAME)) { |
627 | 0 | vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 1, 255); |
628 | 71.1k | } else if ((cpi->oxcf.number_of_layers > 1) && |
629 | 71.1k | (cpi->ref_frame_flags == VP8_ALTR_FRAME)) { |
630 | 0 | vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 1, 1); |
631 | 71.1k | } else { |
632 | 71.1k | vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, |
633 | 71.1k | cpi->prob_last_coded, cpi->prob_gf_coded); |
634 | 71.1k | } |
635 | | |
636 | 88.0k | xd->fullpixel_mask = ~0; |
637 | 88.0k | if (cm->full_pixel) xd->fullpixel_mask = ~7; |
638 | | |
639 | 88.0k | vp8_zero(x->coef_counts); |
640 | 88.0k | vp8_zero(x->ymode_count); |
641 | 88.0k | vp8_zero(x->uv_mode_count); |
642 | 88.0k | x->prediction_error = 0; |
643 | 88.0k | x->intra_error = 0; |
644 | 88.0k | vp8_zero(x->count_mb_ref_frame_usage); |
645 | 88.0k | } |
646 | | |
647 | | #if CONFIG_MULTITHREAD |
648 | 0 | static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread) { |
649 | 0 | int i = 0; |
650 | 0 | do { |
651 | 0 | int j = 0; |
652 | 0 | do { |
653 | 0 | int k = 0; |
654 | 0 | do { |
655 | | /* at every context */ |
656 | | |
657 | | /* calc probs and branch cts for this frame only */ |
658 | 0 | int t = 0; /* token/prob index */ |
659 | |
|
660 | 0 | do { |
661 | 0 | x->coef_counts[i][j][k][t] += x_thread->coef_counts[i][j][k][t]; |
662 | 0 | } while (++t < ENTROPY_NODES); |
663 | 0 | } while (++k < PREV_COEF_CONTEXTS); |
664 | 0 | } while (++j < COEF_BANDS); |
665 | 0 | } while (++i < BLOCK_TYPES); |
666 | 0 | } |
667 | | #endif // CONFIG_MULTITHREAD |
668 | | |
669 | 88.0k | void vp8_encode_frame(VP8_COMP *cpi) { |
670 | 88.0k | int mb_row; |
671 | 88.0k | MACROBLOCK *const x = &cpi->mb; |
672 | 88.0k | VP8_COMMON *const cm = &cpi->common; |
673 | 88.0k | MACROBLOCKD *const xd = &x->e_mbd; |
674 | 88.0k | TOKENEXTRA *tp = cpi->tok; |
675 | 88.0k | int segment_counts[MAX_MB_SEGMENTS]; |
676 | 88.0k | int totalrate; |
677 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
678 | | BOOL_CODER *bc = &cpi->bc[1]; /* bc[0] is for control partition */ |
679 | | const int num_part = (1 << cm->multi_token_partition); |
680 | | #endif |
681 | | |
682 | 88.0k | memset(segment_counts, 0, sizeof(segment_counts)); |
683 | 88.0k | totalrate = 0; |
684 | | |
685 | 88.0k | if (cpi->compressor_speed == 2) { |
686 | 32.6k | if (cpi->oxcf.cpu_used < 0) { |
687 | 0 | cpi->Speed = -(cpi->oxcf.cpu_used); |
688 | 32.6k | } else { |
689 | 32.6k | vp8_auto_select_speed(cpi); |
690 | 32.6k | } |
691 | 32.6k | } |
692 | | |
693 | | /* Functions setup for all frame types so we can use MC in AltRef */ |
694 | 88.0k | if (!cm->use_bilinear_mc_filter) { |
695 | 88.0k | xd->subpixel_predict = vp8_sixtap_predict4x4; |
696 | 88.0k | xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; |
697 | 88.0k | xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; |
698 | 88.0k | xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; |
699 | 88.0k | } else { |
700 | 0 | xd->subpixel_predict = vp8_bilinear_predict4x4; |
701 | 0 | xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; |
702 | 0 | xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; |
703 | 0 | xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; |
704 | 0 | } |
705 | | |
706 | 88.0k | cpi->mb.skip_true_count = 0; |
707 | 88.0k | cpi->tok_count = 0; |
708 | | |
709 | | #if 0 |
710 | | /* Experimental code */ |
711 | | cpi->frame_distortion = 0; |
712 | | cpi->last_mb_distortion = 0; |
713 | | #endif |
714 | | |
715 | 88.0k | xd->mode_info_context = cm->mi; |
716 | | |
717 | 88.0k | vp8_zero(cpi->mb.MVcount); |
718 | | |
719 | 88.0k | vp8cx_frame_init_quantizer(cpi); |
720 | | |
721 | 88.0k | vp8_initialize_rd_consts(cpi, x, |
722 | 88.0k | vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); |
723 | | |
724 | 88.0k | vp8cx_initialize_me_consts(cpi, cm->base_qindex); |
725 | | |
726 | 88.0k | if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { |
727 | | /* Initialize encode frame context. */ |
728 | 0 | init_encode_frame_mb_context(cpi); |
729 | | |
730 | | /* Build a frame level activity map */ |
731 | 0 | build_activity_map(cpi); |
732 | 0 | } |
733 | | |
734 | | /* re-init encode frame context. */ |
735 | 88.0k | init_encode_frame_mb_context(cpi); |
736 | | |
737 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
738 | | { |
739 | | int i; |
740 | | for (i = 0; i < num_part; ++i) { |
741 | | vp8_start_encode(&bc[i], cpi->partition_d[i + 1], |
742 | | cpi->partition_d_end[i + 1]); |
743 | | bc[i].error = &cm->error; |
744 | | } |
745 | | } |
746 | | |
747 | | #endif |
748 | | |
749 | 88.0k | { |
750 | 88.0k | struct vpx_usec_timer emr_timer; |
751 | 88.0k | vpx_usec_timer_start(&emr_timer); |
752 | | |
753 | 88.0k | #if CONFIG_MULTITHREAD |
754 | 88.0k | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) { |
755 | 0 | int i; |
756 | |
|
757 | 0 | vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, |
758 | 0 | cpi->encoding_thread_count); |
759 | |
|
760 | 0 | if (cpi->mt_current_mb_col_size != cm->mb_rows) { |
761 | 0 | vpx_free(cpi->mt_current_mb_col); |
762 | 0 | cpi->mt_current_mb_col = NULL; |
763 | 0 | cpi->mt_current_mb_col_size = 0; |
764 | 0 | CHECK_MEM_ERROR( |
765 | 0 | &cpi->common.error, cpi->mt_current_mb_col, |
766 | 0 | vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); |
767 | 0 | cpi->mt_current_mb_col_size = cm->mb_rows; |
768 | 0 | } |
769 | 0 | for (i = 0; i < cm->mb_rows; ++i) |
770 | 0 | vpx_atomic_store_release(&cpi->mt_current_mb_col[i], -1); |
771 | |
|
772 | 0 | for (i = 0; i < cpi->encoding_thread_count; ++i) { |
773 | 0 | vp8_sem_post(&cpi->h_event_start_encoding[i]); |
774 | 0 | } |
775 | |
|
776 | 0 | for (mb_row = 0; mb_row < cm->mb_rows; |
777 | 0 | mb_row += (cpi->encoding_thread_count + 1)) { |
778 | 0 | vp8_zero(cm->left_context); |
779 | |
|
780 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
781 | | tp = cpi->tok; |
782 | | #else |
783 | 0 | tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24); |
784 | 0 | #endif |
785 | |
|
786 | 0 | encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); |
787 | | |
788 | | /* adjust to the next row of mbs */ |
789 | 0 | x->src.y_buffer += |
790 | 0 | 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - |
791 | 0 | 16 * cm->mb_cols; |
792 | 0 | x->src.u_buffer += |
793 | 0 | 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - |
794 | 0 | 8 * cm->mb_cols; |
795 | 0 | x->src.v_buffer += |
796 | 0 | 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - |
797 | 0 | 8 * cm->mb_cols; |
798 | |
|
799 | 0 | xd->mode_info_context += |
800 | 0 | xd->mode_info_stride * cpi->encoding_thread_count; |
801 | 0 | x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; |
802 | 0 | x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; |
803 | 0 | } |
804 | | /* Wait for all the threads to finish. */ |
805 | 0 | for (i = 0; i < cpi->encoding_thread_count; ++i) { |
806 | 0 | vp8_sem_wait(&cpi->h_event_end_encoding[i]); |
807 | 0 | } |
808 | |
|
809 | 0 | for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { |
810 | 0 | cpi->tok_count += (unsigned int)(cpi->tplist[mb_row].stop - |
811 | 0 | cpi->tplist[mb_row].start); |
812 | 0 | } |
813 | |
|
814 | 0 | if (xd->segmentation_enabled) { |
815 | 0 | int j; |
816 | |
|
817 | 0 | if (xd->segmentation_enabled) { |
818 | 0 | for (i = 0; i < cpi->encoding_thread_count; ++i) { |
819 | 0 | for (j = 0; j < 4; ++j) { |
820 | 0 | segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j]; |
821 | 0 | } |
822 | 0 | } |
823 | 0 | } |
824 | 0 | } |
825 | |
|
826 | 0 | for (i = 0; i < cpi->encoding_thread_count; ++i) { |
827 | 0 | int mode_count; |
828 | 0 | int c_idx; |
829 | 0 | totalrate += cpi->mb_row_ei[i].totalrate; |
830 | |
|
831 | 0 | cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count; |
832 | |
|
833 | 0 | for (mode_count = 0; mode_count < VP8_YMODES; ++mode_count) { |
834 | 0 | cpi->mb.ymode_count[mode_count] += |
835 | 0 | cpi->mb_row_ei[i].mb.ymode_count[mode_count]; |
836 | 0 | } |
837 | |
|
838 | 0 | for (mode_count = 0; mode_count < VP8_UV_MODES; ++mode_count) { |
839 | 0 | cpi->mb.uv_mode_count[mode_count] += |
840 | 0 | cpi->mb_row_ei[i].mb.uv_mode_count[mode_count]; |
841 | 0 | } |
842 | |
|
843 | 0 | for (c_idx = 0; c_idx < MVvals; ++c_idx) { |
844 | 0 | cpi->mb.MVcount[0][c_idx] += cpi->mb_row_ei[i].mb.MVcount[0][c_idx]; |
845 | 0 | cpi->mb.MVcount[1][c_idx] += cpi->mb_row_ei[i].mb.MVcount[1][c_idx]; |
846 | 0 | } |
847 | |
|
848 | 0 | cpi->mb.prediction_error += cpi->mb_row_ei[i].mb.prediction_error; |
849 | 0 | cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error; |
850 | |
|
851 | 0 | for (c_idx = 0; c_idx < MAX_REF_FRAMES; ++c_idx) { |
852 | 0 | cpi->mb.count_mb_ref_frame_usage[c_idx] += |
853 | 0 | cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx]; |
854 | 0 | } |
855 | |
|
856 | 0 | for (c_idx = 0; c_idx < MAX_ERROR_BINS; ++c_idx) { |
857 | 0 | cpi->mb.error_bins[c_idx] += cpi->mb_row_ei[i].mb.error_bins[c_idx]; |
858 | 0 | } |
859 | | |
860 | | /* add up counts for each thread */ |
861 | 0 | sum_coef_counts(x, &cpi->mb_row_ei[i].mb); |
862 | 0 | } |
863 | |
|
864 | 0 | } else |
865 | 88.0k | #endif // CONFIG_MULTITHREAD |
866 | 88.0k | { |
867 | | |
868 | | /* for each macroblock row in image */ |
869 | 416k | for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { |
870 | 328k | vp8_zero(cm->left_context); |
871 | | |
872 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
873 | | tp = cpi->tok; |
874 | | #endif |
875 | | |
876 | 328k | encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); |
877 | | |
878 | | /* adjust to the next row of mbs */ |
879 | 328k | x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; |
880 | 328k | x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; |
881 | 328k | x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; |
882 | 328k | } |
883 | | |
884 | 88.0k | cpi->tok_count = (unsigned int)(tp - cpi->tok); |
885 | 88.0k | } |
886 | | |
887 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
888 | | { |
889 | | int i; |
890 | | for (i = 0; i < num_part; ++i) { |
891 | | vp8_stop_encode(&bc[i]); |
892 | | cpi->partition_sz[i + 1] = bc[i].pos; |
893 | | } |
894 | | } |
895 | | #endif |
896 | | |
897 | 88.0k | vpx_usec_timer_mark(&emr_timer); |
898 | 88.0k | cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer); |
899 | 88.0k | } |
900 | | |
901 | | // Work out the segment probabilities if segmentation is enabled |
902 | | // and needs to be updated |
903 | 88.0k | if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { |
904 | 0 | int tot_count; |
905 | 0 | int i; |
906 | | |
907 | | /* Set to defaults */ |
908 | 0 | memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); |
909 | |
|
910 | 0 | tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + |
911 | 0 | segment_counts[3]; |
912 | |
|
913 | 0 | if (tot_count) { |
914 | 0 | xd->mb_segment_tree_probs[0] = |
915 | 0 | ((segment_counts[0] + segment_counts[1]) * 255) / tot_count; |
916 | |
|
917 | 0 | tot_count = segment_counts[0] + segment_counts[1]; |
918 | |
|
919 | 0 | if (tot_count > 0) { |
920 | 0 | xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count; |
921 | 0 | } |
922 | |
|
923 | 0 | tot_count = segment_counts[2] + segment_counts[3]; |
924 | |
|
925 | 0 | if (tot_count > 0) { |
926 | 0 | xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count; |
927 | 0 | } |
928 | | |
929 | | /* Zero probabilities not allowed */ |
930 | 0 | for (i = 0; i < MB_FEATURE_TREE_PROBS; ++i) { |
931 | 0 | if (xd->mb_segment_tree_probs[i] == 0) xd->mb_segment_tree_probs[i] = 1; |
932 | 0 | } |
933 | 0 | } |
934 | 0 | } |
935 | | |
936 | | /* projected_frame_size in units of BYTES */ |
937 | 88.0k | cpi->projected_frame_size = totalrate >> 8; |
938 | | |
939 | | /* Make a note of the percentage MBs coded Intra. */ |
940 | 88.0k | if (cm->frame_type == KEY_FRAME) { |
941 | 18.6k | cpi->this_frame_percent_intra = 100; |
942 | 69.4k | } else { |
943 | 69.4k | int tot_modes; |
944 | | |
945 | 69.4k | tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] + |
946 | 69.4k | cpi->mb.count_mb_ref_frame_usage[LAST_FRAME] + |
947 | 69.4k | cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME] + |
948 | 69.4k | cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; |
949 | | |
950 | 69.4k | if (tot_modes) { |
951 | 69.4k | cpi->this_frame_percent_intra = |
952 | 69.4k | cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes; |
953 | 69.4k | } |
954 | 69.4k | } |
955 | | |
956 | 88.0k | #if !CONFIG_REALTIME_ONLY |
957 | | /* Adjust the projected reference frame usage probability numbers to |
958 | | * reflect what we have just seen. This may be useful when we make |
959 | | * multiple iterations of the recode loop rather than continuing to use |
960 | | * values from the previous frame. |
961 | | */ |
962 | 88.0k | if ((cm->frame_type != KEY_FRAME) && |
963 | 88.0k | ((cpi->oxcf.number_of_layers > 1) || |
964 | 69.4k | (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame))) { |
965 | 62.3k | vp8_convert_rfct_to_prob(cpi); |
966 | 62.3k | } |
967 | 88.0k | #endif |
968 | 88.0k | } |
969 | 4.91k | void vp8_setup_block_ptrs(MACROBLOCK *x) { |
970 | 4.91k | int r, c; |
971 | 4.91k | int i; |
972 | | |
973 | 24.5k | for (r = 0; r < 4; ++r) { |
974 | 98.2k | for (c = 0; c < 4; ++c) { |
975 | 78.6k | x->block[r * 4 + c].src_diff = x->src_diff + r * 4 * 16 + c * 4; |
976 | 78.6k | } |
977 | 19.6k | } |
978 | | |
979 | 14.7k | for (r = 0; r < 2; ++r) { |
980 | 29.4k | for (c = 0; c < 2; ++c) { |
981 | 19.6k | x->block[16 + r * 2 + c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4; |
982 | 19.6k | } |
983 | 9.82k | } |
984 | | |
985 | 14.7k | for (r = 0; r < 2; ++r) { |
986 | 29.4k | for (c = 0; c < 2; ++c) { |
987 | 19.6k | x->block[20 + r * 2 + c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4; |
988 | 19.6k | } |
989 | 9.82k | } |
990 | | |
991 | 4.91k | x->block[24].src_diff = x->src_diff + 384; |
992 | | |
993 | 127k | for (i = 0; i < 25; ++i) { |
994 | 122k | x->block[i].coeff = x->coeff + i * 16; |
995 | 122k | } |
996 | 4.91k | } |
997 | | |
998 | 88.0k | void vp8_build_block_offsets(MACROBLOCK *x) { |
999 | 88.0k | int block = 0; |
1000 | 88.0k | int br, bc; |
1001 | | |
1002 | 88.0k | vp8_build_block_doffsets(&x->e_mbd); |
1003 | | |
1004 | | /* y blocks */ |
1005 | 88.0k | x->thismb_ptr = &x->thismb[0]; |
1006 | 440k | for (br = 0; br < 4; ++br) { |
1007 | 1.76M | for (bc = 0; bc < 4; ++bc) { |
1008 | 1.40M | BLOCK *this_block = &x->block[block]; |
1009 | 1.40M | this_block->base_src = &x->thismb_ptr; |
1010 | 1.40M | this_block->src_stride = 16; |
1011 | 1.40M | this_block->src = 4 * br * 16 + 4 * bc; |
1012 | 1.40M | ++block; |
1013 | 1.40M | } |
1014 | 352k | } |
1015 | | |
1016 | | /* u blocks */ |
1017 | 264k | for (br = 0; br < 2; ++br) { |
1018 | 528k | for (bc = 0; bc < 2; ++bc) { |
1019 | 352k | BLOCK *this_block = &x->block[block]; |
1020 | 352k | this_block->base_src = &x->src.u_buffer; |
1021 | 352k | this_block->src_stride = x->src.uv_stride; |
1022 | 352k | this_block->src = 4 * br * this_block->src_stride + 4 * bc; |
1023 | 352k | ++block; |
1024 | 352k | } |
1025 | 176k | } |
1026 | | |
1027 | | /* v blocks */ |
1028 | 264k | for (br = 0; br < 2; ++br) { |
1029 | 528k | for (bc = 0; bc < 2; ++bc) { |
1030 | 352k | BLOCK *this_block = &x->block[block]; |
1031 | 352k | this_block->base_src = &x->src.v_buffer; |
1032 | 352k | this_block->src_stride = x->src.uv_stride; |
1033 | 352k | this_block->src = 4 * br * this_block->src_stride + 4 * bc; |
1034 | 352k | ++block; |
1035 | 352k | } |
1036 | 176k | } |
1037 | 88.0k | } |
1038 | | |
1039 | 1.65M | static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) { |
1040 | 1.65M | const MACROBLOCKD *xd = &x->e_mbd; |
1041 | 1.65M | const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode; |
1042 | 1.65M | const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode; |
1043 | | |
1044 | | #ifdef MODE_STATS |
1045 | | const int is_key = cpi->common.frame_type == KEY_FRAME; |
1046 | | |
1047 | | ++(is_key ? uv_modes : inter_uv_modes)[uvm]; |
1048 | | |
1049 | | if (m == B_PRED) { |
1050 | | unsigned int *const bct = is_key ? b_modes : inter_b_modes; |
1051 | | |
1052 | | int b = 0; |
1053 | | |
1054 | | do { |
1055 | | ++bct[xd->block[b].bmi.mode]; |
1056 | | } while (++b < 16); |
1057 | | } |
1058 | | |
1059 | | #else |
1060 | 1.65M | (void)cpi; |
1061 | 1.65M | #endif |
1062 | | |
1063 | 1.65M | ++x->ymode_count[m]; |
1064 | 1.65M | ++x->uv_mode_count[uvm]; |
1065 | 1.65M | } |
1066 | | |
1067 | | /* Experimental stub function to create a per MB zbin adjustment based on |
1068 | | * some previously calculated measure of MB activity. |
1069 | | */ |
1070 | 0 | static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x) { |
1071 | | #if USE_ACT_INDEX |
1072 | | x->act_zbin_adj = *(x->mb_activity_ptr); |
1073 | | #else |
1074 | 0 | int64_t a; |
1075 | 0 | int64_t b; |
1076 | 0 | int64_t act = *(x->mb_activity_ptr); |
1077 | | |
1078 | | /* Apply the masking to the RD multiplier. */ |
1079 | 0 | a = act + 4 * cpi->activity_avg; |
1080 | 0 | b = 4 * act + cpi->activity_avg; |
1081 | |
|
1082 | 0 | if (act > cpi->activity_avg) { |
1083 | 0 | x->act_zbin_adj = (int)(((int64_t)b + (a >> 1)) / a) - 1; |
1084 | 0 | } else { |
1085 | 0 | x->act_zbin_adj = 1 - (int)(((int64_t)a + (b >> 1)) / b); |
1086 | 0 | } |
1087 | 0 | #endif |
1088 | 0 | } |
1089 | | |
1090 | | int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, |
1091 | 880k | TOKENEXTRA **t) { |
1092 | 880k | MACROBLOCKD *xd = &x->e_mbd; |
1093 | 880k | int rate; |
1094 | | |
1095 | 880k | if (cpi->sf.RD && cpi->compressor_speed != 2) { |
1096 | 346k | vp8_rd_pick_intra_mode(x, &rate); |
1097 | 534k | } else { |
1098 | 534k | vp8_pick_intra_mode(x, &rate); |
1099 | 534k | } |
1100 | | |
1101 | 880k | if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { |
1102 | 0 | adjust_act_zbin(cpi, x); |
1103 | 0 | vp8_update_zbin_extra(cpi, x); |
1104 | 0 | } |
1105 | | |
1106 | 880k | if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED) { |
1107 | 503k | vp8_encode_intra4x4mby(x); |
1108 | 503k | } else { |
1109 | 376k | vp8_encode_intra16x16mby(x); |
1110 | 376k | } |
1111 | | |
1112 | 880k | vp8_encode_intra16x16mbuv(x); |
1113 | | |
1114 | 880k | sum_intra_stats(cpi, x); |
1115 | | |
1116 | 880k | vp8_tokenize_mb(cpi, x, t); |
1117 | | |
1118 | 880k | if (xd->mode_info_context->mbmi.mode != B_PRED) vp8_inverse_transform_mby(xd); |
1119 | | |
1120 | 880k | vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv, |
1121 | 880k | xd->dst.u_buffer, xd->dst.v_buffer, |
1122 | 880k | xd->dst.uv_stride, xd->eobs + 16); |
1123 | 880k | return rate; |
1124 | 880k | } |
1125 | | #ifdef SPEEDSTATS |
1126 | | extern int cnt_pm; |
1127 | | #endif |
1128 | | |
1129 | | extern void vp8_fix_contexts(MACROBLOCKD *x); |
1130 | | |
1131 | | int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, |
1132 | | int recon_yoffset, int recon_uvoffset, |
1133 | 1.27M | int mb_row, int mb_col) { |
1134 | 1.27M | MACROBLOCKD *const xd = &x->e_mbd; |
1135 | 1.27M | int intra_error = 0; |
1136 | 1.27M | int rate; |
1137 | 1.27M | int distortion; |
1138 | | |
1139 | 1.27M | x->skip = 0; |
1140 | | |
1141 | 1.27M | if (xd->segmentation_enabled) { |
1142 | 0 | x->encode_breakout = |
1143 | 0 | cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id]; |
1144 | 1.27M | } else { |
1145 | 1.27M | x->encode_breakout = cpi->oxcf.encode_breakout; |
1146 | 1.27M | } |
1147 | | |
1148 | 1.27M | #if CONFIG_TEMPORAL_DENOISING |
1149 | | /* Reset the best sse mode/mv for each macroblock. */ |
1150 | 1.27M | x->best_reference_frame = INTRA_FRAME; |
1151 | 1.27M | x->best_zeromv_reference_frame = INTRA_FRAME; |
1152 | 1.27M | x->best_sse_inter_mode = 0; |
1153 | 1.27M | x->best_sse_mv.as_int = 0; |
1154 | 1.27M | x->need_to_clamp_best_mvs = 0; |
1155 | 1.27M | #endif |
1156 | | |
1157 | 1.27M | if (cpi->sf.RD) { |
1158 | 552k | int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; |
1159 | | |
1160 | | /* Are we using the fast quantizer for the mode selection? */ |
1161 | 552k | if (cpi->sf.use_fastquant_for_pick) { |
1162 | 552k | x->quantize_b = vp8_fast_quantize_b; |
1163 | | |
1164 | | /* the fast quantizer does not use zbin_extra, so |
1165 | | * do not recalculate */ |
1166 | 552k | x->zbin_mode_boost_enabled = 0; |
1167 | 552k | } |
1168 | 552k | vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, |
1169 | 552k | &distortion, &intra_error, mb_row, mb_col); |
1170 | | |
1171 | | /* switch back to the regular quantizer for the encode */ |
1172 | 552k | if (cpi->sf.improved_quant) { |
1173 | 552k | x->quantize_b = vp8_regular_quantize_b; |
1174 | 552k | } |
1175 | | |
1176 | | /* restore cpi->zbin_mode_boost_enabled */ |
1177 | 552k | x->zbin_mode_boost_enabled = zbin_mode_boost_enabled; |
1178 | | |
1179 | 725k | } else { |
1180 | 725k | vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, |
1181 | 725k | &distortion, &intra_error, mb_row, mb_col); |
1182 | 725k | } |
1183 | | |
1184 | 1.27M | x->prediction_error += distortion; |
1185 | 1.27M | x->intra_error += intra_error; |
1186 | | |
1187 | 1.27M | if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { |
1188 | | /* Adjust the zbin based on this MB rate. */ |
1189 | 0 | adjust_act_zbin(cpi, x); |
1190 | 0 | } |
1191 | | |
1192 | | #if 0 |
1193 | | /* Experimental RD code */ |
1194 | | cpi->frame_distortion += distortion; |
1195 | | cpi->last_mb_distortion = distortion; |
1196 | | #endif |
1197 | | |
1198 | | /* MB level adjutment to quantizer setup */ |
1199 | 1.27M | if (xd->segmentation_enabled) { |
1200 | | /* If cyclic update enabled */ |
1201 | 0 | if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled) { |
1202 | | /* Clear segment_id back to 0 if not coded (last frame 0,0) */ |
1203 | 0 | if ((xd->mode_info_context->mbmi.segment_id == 1) && |
1204 | 0 | ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || |
1205 | 0 | (xd->mode_info_context->mbmi.mode != ZEROMV))) { |
1206 | 0 | xd->mode_info_context->mbmi.segment_id = 0; |
1207 | | |
1208 | | /* segment_id changed, so update */ |
1209 | 0 | vp8cx_mb_init_quantizer(cpi, x, 1); |
1210 | 0 | } |
1211 | 0 | } |
1212 | 0 | } |
1213 | | |
1214 | 1.27M | { |
1215 | | /* Experimental code. |
1216 | | * Special case for gf and arf zeromv modes, for 1 temporal layer. |
1217 | | * Increase zbin size to supress noise. |
1218 | | */ |
1219 | 1.27M | x->zbin_mode_boost = 0; |
1220 | 1.27M | if (x->zbin_mode_boost_enabled) { |
1221 | 1.27M | if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { |
1222 | 504k | if (xd->mode_info_context->mbmi.mode == ZEROMV) { |
1223 | 87.5k | if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME && |
1224 | 87.5k | cpi->oxcf.number_of_layers == 1) { |
1225 | 11.4k | x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; |
1226 | 76.1k | } else { |
1227 | 76.1k | x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; |
1228 | 76.1k | } |
1229 | 417k | } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { |
1230 | 113k | x->zbin_mode_boost = 0; |
1231 | 303k | } else { |
1232 | 303k | x->zbin_mode_boost = MV_ZBIN_BOOST; |
1233 | 303k | } |
1234 | 504k | } |
1235 | 1.27M | } |
1236 | | |
1237 | | /* The fast quantizer doesn't use zbin_extra, only do so with |
1238 | | * the regular quantizer. */ |
1239 | 1.27M | if (cpi->sf.improved_quant) vp8_update_zbin_extra(cpi, x); |
1240 | 1.27M | } |
1241 | | |
1242 | 1.27M | x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame]++; |
1243 | | |
1244 | 1.27M | if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { |
1245 | 773k | vp8_encode_intra16x16mbuv(x); |
1246 | | |
1247 | 773k | if (xd->mode_info_context->mbmi.mode == B_PRED) { |
1248 | 289k | vp8_encode_intra4x4mby(x); |
1249 | 484k | } else { |
1250 | 484k | vp8_encode_intra16x16mby(x); |
1251 | 484k | } |
1252 | | |
1253 | 773k | sum_intra_stats(cpi, x); |
1254 | 773k | } else { |
1255 | 504k | int ref_fb_idx; |
1256 | | |
1257 | 504k | if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) { |
1258 | 431k | ref_fb_idx = cpi->common.lst_fb_idx; |
1259 | 431k | } else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) { |
1260 | 55.9k | ref_fb_idx = cpi->common.gld_fb_idx; |
1261 | 55.9k | } else { |
1262 | 17.6k | ref_fb_idx = cpi->common.alt_fb_idx; |
1263 | 17.6k | } |
1264 | | |
1265 | 504k | xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; |
1266 | 504k | xd->pre.u_buffer = |
1267 | 504k | cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; |
1268 | 504k | xd->pre.v_buffer = |
1269 | 504k | cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; |
1270 | | |
1271 | 504k | if (!x->skip) { |
1272 | 504k | vp8_encode_inter16x16(x); |
1273 | 504k | } else { |
1274 | 0 | vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, |
1275 | 0 | xd->dst.v_buffer, xd->dst.y_stride, |
1276 | 0 | xd->dst.uv_stride); |
1277 | 0 | } |
1278 | 504k | } |
1279 | | |
1280 | 1.27M | if (!x->skip) { |
1281 | 1.27M | vp8_tokenize_mb(cpi, x, t); |
1282 | | |
1283 | 1.27M | if (xd->mode_info_context->mbmi.mode != B_PRED) { |
1284 | 988k | vp8_inverse_transform_mby(xd); |
1285 | 988k | } |
1286 | | |
1287 | 1.27M | vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv, |
1288 | 1.27M | xd->dst.u_buffer, xd->dst.v_buffer, |
1289 | 1.27M | xd->dst.uv_stride, xd->eobs + 16); |
1290 | 1.27M | } else { |
1291 | | /* always set mb_skip_coeff as it is needed by the loopfilter */ |
1292 | 0 | xd->mode_info_context->mbmi.mb_skip_coeff = 1; |
1293 | |
|
1294 | 0 | if (cpi->common.mb_no_coeff_skip) { |
1295 | 0 | x->skip_true_count++; |
1296 | 0 | vp8_fix_contexts(xd); |
1297 | 0 | } else { |
1298 | 0 | vp8_stuff_mb(cpi, x, t); |
1299 | 0 | } |
1300 | 0 | } |
1301 | | |
1302 | 1.27M | return rate; |
1303 | 1.27M | } |