/src/libvpx/vp9/encoder/vp9_encodemb.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "./vp9_rtcd.h" |
12 | | #include "./vpx_config.h" |
13 | | #include "./vpx_dsp_rtcd.h" |
14 | | |
15 | | #include "vpx_dsp/quantize.h" |
16 | | #include "vpx_mem/vpx_mem.h" |
17 | | #include "vpx_ports/mem.h" |
18 | | |
19 | | #if CONFIG_MISMATCH_DEBUG |
20 | | #include "vpx_util/vpx_debug_util.h" |
21 | | #endif |
22 | | |
23 | | #include "vp9/common/vp9_idct.h" |
24 | | #include "vp9/common/vp9_reconinter.h" |
25 | | #include "vp9/common/vp9_reconintra.h" |
26 | | #include "vp9/common/vp9_scan.h" |
27 | | |
28 | | #include "vp9/encoder/vp9_encodemb.h" |
29 | | #include "vp9/encoder/vp9_encoder.h" |
30 | | #include "vp9/encoder/vp9_rd.h" |
31 | | #include "vp9/encoder/vp9_tokenize.h" |
32 | | |
33 | | struct optimize_ctx { |
34 | | ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; |
35 | | ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; |
36 | | }; |
37 | | |
38 | 41.7M | void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { |
39 | 41.7M | struct macroblock_plane *const p = &x->plane[plane]; |
40 | 41.7M | const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; |
41 | 41.7M | const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); |
42 | 41.7M | const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
43 | 41.7M | const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; |
44 | | |
45 | 41.7M | #if CONFIG_VP9_HIGHBITDEPTH |
46 | 41.7M | if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
47 | 0 | vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, |
48 | 0 | p->src.stride, pd->dst.buf, pd->dst.stride, |
49 | 0 | x->e_mbd.bd); |
50 | 0 | return; |
51 | 0 | } |
52 | 41.7M | #endif // CONFIG_VP9_HIGHBITDEPTH |
53 | 41.7M | vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, |
54 | 41.7M | pd->dst.buf, pd->dst.stride); |
55 | 41.7M | } |
56 | | |
57 | | static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { |
58 | | { 10, 6 }, |
59 | | { 8, 5 }, |
60 | | }; |
61 | | |
62 | | // 'num' can be negative, but 'shift' must be non-negative. |
63 | | #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \ |
64 | 728k | (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift))) |
65 | | |
66 | | int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, |
67 | 15.3M | int ctx) { |
68 | 15.3M | MACROBLOCKD *const xd = &mb->e_mbd; |
69 | 15.3M | struct macroblock_plane *const p = &mb->plane[plane]; |
70 | 15.3M | struct macroblockd_plane *const pd = &xd->plane[plane]; |
71 | 15.3M | const int ref = is_inter_block(xd->mi[0]); |
72 | 15.3M | uint8_t token_cache[1024]; |
73 | 15.3M | const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); |
74 | 15.3M | tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
75 | 15.3M | tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
76 | 15.3M | const int eob = p->eobs[block]; |
77 | 15.3M | const PLANE_TYPE plane_type = get_plane_type(plane); |
78 | 15.3M | const int default_eob = 16 << (tx_size << 1); |
79 | 15.3M | const int shift = (tx_size == TX_32X32); |
80 | 15.3M | const int16_t *const dequant_ptr = pd->dequant; |
81 | 15.3M | const uint8_t *const band_translate = get_band_translate(tx_size); |
82 | 15.3M | const ScanOrder *const so = get_scan(xd, tx_size, plane_type, block); |
83 | 15.3M | const int16_t *const scan = so->scan; |
84 | 15.3M | const int16_t *const nb = so->neighbors; |
85 | 15.3M | const MODE_INFO *mbmi = xd->mi[0]; |
86 | 15.3M | const int sharpness = mb->sharpness; |
87 | 15.3M | const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]; |
88 | 15.3M | const int64_t rdmult = |
89 | 15.3M | (sharpness == 0 ? rdadj >> 1 |
90 | 15.3M | : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4); |
91 | | |
92 | 15.3M | const int64_t rddiv = mb->rddiv; |
93 | 15.3M | int64_t rd_cost0, rd_cost1; |
94 | 15.3M | int64_t rate0, rate1; |
95 | 15.3M | int16_t t0, t1; |
96 | 15.3M | int i, final_eob; |
97 | 15.3M | int count_high_values_after_eob = 0; |
98 | 15.3M | #if CONFIG_VP9_HIGHBITDEPTH |
99 | 15.3M | const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); |
100 | | #else |
101 | | const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); |
102 | | #endif |
103 | 15.3M | unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = |
104 | 15.3M | mb->token_costs[tx_size][plane_type][ref]; |
105 | 15.3M | unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS]; |
106 | 15.3M | int64_t eob_cost0, eob_cost1; |
107 | 15.3M | const int ctx0 = ctx; |
108 | 15.3M | int64_t accu_rate = 0; |
109 | | // Initialized to the worst possible error for the largest transform size. |
110 | | // This ensures that it never goes negative. |
111 | 15.3M | int64_t accu_error = ((int64_t)1) << 50; |
112 | 15.3M | int64_t best_block_rd_cost = INT64_MAX; |
113 | 15.3M | int x_prev = 1; |
114 | 15.3M | tran_low_t before_best_eob_qc = 0; |
115 | 15.3M | tran_low_t before_best_eob_dqc = 0; |
116 | | |
117 | 15.3M | assert((!plane_type && !plane) || (plane_type && plane)); |
118 | 15.3M | assert(eob <= default_eob); |
119 | | |
120 | 312M | for (i = 0; i < eob; i++) { |
121 | 297M | const int rc = scan[i]; |
122 | 297M | token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])]; |
123 | 297M | } |
124 | 15.3M | final_eob = 0; |
125 | | |
126 | | // Initial RD cost. |
127 | 15.3M | token_costs_cur = token_costs + band_translate[0]; |
128 | 15.3M | rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN]; |
129 | 15.3M | best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error); |
130 | | |
131 | | // For each token, pick one of two choices greedily: |
132 | | // (i) First candidate: Keep current quantized value, OR |
133 | | // (ii) Second candidate: Reduce quantized value by 1. |
134 | 312M | for (i = 0; i < eob; i++) { |
135 | 297M | const int rc = scan[i]; |
136 | 297M | const int x = qcoeff[rc]; |
137 | 297M | const int band_cur = band_translate[i]; |
138 | 297M | const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i); |
139 | 297M | const int token_tree_sel_cur = (x_prev == 0); |
140 | 297M | token_costs_cur = token_costs + band_cur; |
141 | 297M | if (x == 0) { // No need to search |
142 | 115M | const int token = vp9_get_token(x); |
143 | 115M | rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token]; |
144 | 115M | accu_rate += rate0; |
145 | 115M | x_prev = 0; |
146 | | // Note: accu_error does not change. |
147 | 181M | } else { |
148 | 181M | const int dqv = dequant_ptr[rc != 0]; |
149 | | // Compute the distortion for quantizing to 0. |
150 | 181M | const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift); |
151 | 181M | const int diff_for_zero = |
152 | 181M | #if CONFIG_VP9_HIGHBITDEPTH |
153 | 181M | (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
154 | 181M | ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8) |
155 | 181M | : |
156 | 181M | #endif |
157 | 181M | diff_for_zero_raw; |
158 | 181M | const int64_t distortion_for_zero = |
159 | 181M | (int64_t)diff_for_zero * diff_for_zero; |
160 | | |
161 | | // Compute the distortion for the first candidate |
162 | 181M | const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift); |
163 | 181M | const int diff0 = |
164 | 181M | #if CONFIG_VP9_HIGHBITDEPTH |
165 | 181M | (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
166 | 181M | ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8) |
167 | 181M | : |
168 | 181M | #endif // CONFIG_VP9_HIGHBITDEPTH |
169 | 181M | diff0_raw; |
170 | 181M | const int64_t distortion0 = (int64_t)diff0 * diff0; |
171 | | |
172 | | // Compute the distortion for the second candidate |
173 | 181M | const int sign = -(x < 0); // -1 if x is negative and 0 otherwise. |
174 | 181M | const int x1 = x - 2 * sign - 1; // abs(x1) = abs(x) - 1. |
175 | 181M | int64_t distortion1; |
176 | 181M | if (x1 != 0) { |
177 | 120M | const int dqv_step = |
178 | 120M | #if CONFIG_VP9_HIGHBITDEPTH |
179 | 120M | (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8) |
180 | 120M | : |
181 | 120M | #endif // CONFIG_VP9_HIGHBITDEPTH |
182 | 120M | dqv; |
183 | 120M | const int diff_step = (dqv_step + sign) ^ sign; |
184 | 120M | const int diff1 = diff0 - diff_step; |
185 | 120M | assert(dqv > 0); // We aren't right shifting a negative number above. |
186 | 120M | distortion1 = (int64_t)diff1 * diff1; |
187 | 120M | } else { |
188 | 60.8M | distortion1 = distortion_for_zero; |
189 | 60.8M | } |
190 | 181M | { |
191 | | // Calculate RDCost for current coeff for the two candidates. |
192 | 181M | const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost); |
193 | 181M | const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost); |
194 | 181M | rate0 = |
195 | 181M | base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0]; |
196 | 181M | rate1 = |
197 | 181M | base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1]; |
198 | 181M | } |
199 | 181M | { |
200 | 181M | int rdcost_better_for_x1, eob_rdcost_better_for_x1; |
201 | 181M | int dqc0, dqc1; |
202 | 181M | int64_t best_eob_cost_cur; |
203 | 181M | int use_x1; |
204 | | |
205 | | // Calculate RD Cost effect on the next coeff for the two candidates. |
206 | 181M | int64_t next_bits0 = 0; |
207 | 181M | int64_t next_bits1 = 0; |
208 | 181M | int64_t next_eob_bits0 = 0; |
209 | 181M | int64_t next_eob_bits1 = 0; |
210 | 181M | if (i < default_eob - 1) { |
211 | 176M | int ctx_next, token_tree_sel_next; |
212 | 176M | const int band_next = band_translate[i + 1]; |
213 | 176M | const int token_next = |
214 | 176M | (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN; |
215 | 176M | unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS] |
216 | 176M | [ENTROPY_TOKENS] = |
217 | 176M | token_costs + band_next; |
218 | 176M | token_cache[rc] = vp9_pt_energy_class[t0]; |
219 | 176M | ctx_next = get_coef_context(nb, token_cache, i + 1); |
220 | 176M | token_tree_sel_next = (x == 0); |
221 | 176M | next_bits0 = |
222 | 176M | (*token_costs_next)[token_tree_sel_next][ctx_next][token_next]; |
223 | 176M | next_eob_bits0 = |
224 | 176M | (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN]; |
225 | 176M | token_cache[rc] = vp9_pt_energy_class[t1]; |
226 | 176M | ctx_next = get_coef_context(nb, token_cache, i + 1); |
227 | 176M | token_tree_sel_next = (x1 == 0); |
228 | 176M | next_bits1 = |
229 | 176M | (*token_costs_next)[token_tree_sel_next][ctx_next][token_next]; |
230 | 176M | if (x1 != 0) { |
231 | 117M | next_eob_bits1 = |
232 | 117M | (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN]; |
233 | 117M | } |
234 | 176M | } |
235 | | |
236 | | // Compare the total RD costs for two candidates. |
237 | 181M | rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0); |
238 | 181M | rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1); |
239 | 181M | rdcost_better_for_x1 = (rd_cost1 < rd_cost0); |
240 | 181M | eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0), |
241 | 181M | (accu_error + distortion0 - distortion_for_zero)); |
242 | 181M | eob_cost1 = eob_cost0; |
243 | 181M | if (x1 != 0) { |
244 | 120M | eob_cost1 = |
245 | 120M | RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1), |
246 | 120M | (accu_error + distortion1 - distortion_for_zero)); |
247 | 120M | eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0); |
248 | 120M | } else { |
249 | 60.8M | eob_rdcost_better_for_x1 = 0; |
250 | 60.8M | } |
251 | | |
252 | | // Calculate the two candidate de-quantized values. |
253 | 181M | dqc0 = dqcoeff[rc]; |
254 | 181M | dqc1 = 0; |
255 | 181M | if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) { |
256 | 1.86M | if (x1 != 0) { |
257 | 728k | dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift); |
258 | 1.14M | } else { |
259 | 1.14M | dqc1 = 0; |
260 | 1.14M | } |
261 | 1.86M | } |
262 | | |
263 | | // Pick and record the better quantized and de-quantized values. |
264 | 181M | if (rdcost_better_for_x1) { |
265 | 1.78M | qcoeff[rc] = x1; |
266 | 1.78M | dqcoeff[rc] = dqc1; |
267 | 1.78M | accu_rate += rate1; |
268 | 1.78M | accu_error += distortion1 - distortion_for_zero; |
269 | 1.78M | assert(distortion1 <= distortion_for_zero); |
270 | 1.78M | token_cache[rc] = vp9_pt_energy_class[t1]; |
271 | 179M | } else { |
272 | 179M | accu_rate += rate0; |
273 | 179M | accu_error += distortion0 - distortion_for_zero; |
274 | 179M | assert(distortion0 <= distortion_for_zero); |
275 | 179M | token_cache[rc] = vp9_pt_energy_class[t0]; |
276 | 179M | } |
277 | 181M | if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++; |
278 | 181M | assert(accu_error >= 0); |
279 | 181M | x_prev = qcoeff[rc]; // Update based on selected quantized value. |
280 | | |
281 | 181M | use_x1 = (x1 != 0) && eob_rdcost_better_for_x1; |
282 | 181M | best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0; |
283 | | |
284 | | // Determine whether to move the eob position to i+1 |
285 | 181M | if (best_eob_cost_cur < best_block_rd_cost) { |
286 | 173M | best_block_rd_cost = best_eob_cost_cur; |
287 | 173M | final_eob = i + 1; |
288 | 173M | count_high_values_after_eob = 0; |
289 | 173M | if (use_x1) { |
290 | 722k | before_best_eob_qc = x1; |
291 | 722k | before_best_eob_dqc = dqc1; |
292 | 172M | } else { |
293 | 172M | before_best_eob_qc = x; |
294 | 172M | before_best_eob_dqc = dqc0; |
295 | 172M | } |
296 | 173M | } |
297 | 181M | } |
298 | 181M | } |
299 | 297M | } |
300 | 15.3M | if (count_high_values_after_eob > 0) { |
301 | 0 | final_eob = eob - 1; |
302 | 0 | for (; final_eob >= 0; final_eob--) { |
303 | 0 | const int rc = scan[final_eob]; |
304 | 0 | const int x = qcoeff[rc]; |
305 | 0 | if (x) { |
306 | 0 | break; |
307 | 0 | } |
308 | 0 | } |
309 | 0 | final_eob++; |
310 | 15.3M | } else { |
311 | 15.3M | assert(final_eob <= eob); |
312 | 15.3M | if (final_eob > 0) { |
313 | 8.81M | int rc; |
314 | 8.81M | assert(before_best_eob_qc != 0); |
315 | 8.81M | i = final_eob - 1; |
316 | 8.81M | rc = scan[i]; |
317 | 8.81M | qcoeff[rc] = before_best_eob_qc; |
318 | 8.81M | dqcoeff[rc] = before_best_eob_dqc; |
319 | 8.81M | } |
320 | 34.6M | for (i = final_eob; i < eob; i++) { |
321 | 19.2M | int rc = scan[i]; |
322 | 19.2M | qcoeff[rc] = 0; |
323 | 19.2M | dqcoeff[rc] = 0; |
324 | 19.2M | } |
325 | 15.3M | } |
326 | 15.3M | mb->plane[plane].eobs[block] = final_eob; |
327 | 15.3M | return final_eob; |
328 | 15.3M | } |
329 | | #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE |
330 | | |
331 | | static INLINE void fdct32x32(int rd_transform, const int16_t *src, |
332 | 3.70M | tran_low_t *dst, int src_stride) { |
333 | 3.70M | if (rd_transform) |
334 | 3.58M | vpx_fdct32x32_rd(src, dst, src_stride); |
335 | 119k | else |
336 | 119k | vpx_fdct32x32(src, dst, src_stride); |
337 | 3.70M | } |
338 | | |
339 | | #if CONFIG_VP9_HIGHBITDEPTH |
340 | | static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, |
341 | 0 | tran_low_t *dst, int src_stride) { |
342 | 0 | if (rd_transform) |
343 | 0 | vpx_highbd_fdct32x32_rd(src, dst, src_stride); |
344 | 0 | else |
345 | 0 | vpx_highbd_fdct32x32(src, dst, src_stride); |
346 | 0 | } |
347 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
348 | | |
349 | | void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, |
350 | 0 | BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { |
351 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
352 | 0 | const struct macroblock_plane *const p = &x->plane[plane]; |
353 | 0 | const struct macroblockd_plane *const pd = &xd->plane[plane]; |
354 | 0 | const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size]; |
355 | 0 | tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); |
356 | 0 | tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
357 | 0 | tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
358 | 0 | uint16_t *const eob = &p->eobs[block]; |
359 | 0 | const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
360 | 0 | const int16_t *src_diff; |
361 | 0 | src_diff = &p->src_diff[4 * (row * diff_stride + col)]; |
362 | | // skip block condition should be handled before this is called. |
363 | 0 | assert(!x->skip_block); |
364 | |
|
365 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
366 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
367 | 0 | switch (tx_size) { |
368 | 0 | case TX_32X32: |
369 | 0 | highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
370 | 0 | vp9_highbd_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff, |
371 | 0 | pd->dequant, eob, scan_order); |
372 | 0 | break; |
373 | 0 | case TX_16X16: |
374 | 0 | vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); |
375 | 0 | vp9_highbd_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob, |
376 | 0 | scan_order); |
377 | 0 | break; |
378 | 0 | case TX_8X8: |
379 | 0 | vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); |
380 | 0 | vp9_highbd_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob, |
381 | 0 | scan_order); |
382 | 0 | break; |
383 | 0 | default: |
384 | 0 | assert(tx_size == TX_4X4); |
385 | 0 | x->fwd_txfm4x4(src_diff, coeff, diff_stride); |
386 | 0 | vp9_highbd_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob, |
387 | 0 | scan_order); |
388 | 0 | break; |
389 | 0 | } |
390 | 0 | return; |
391 | 0 | } |
392 | 0 | #endif // CONFIG_VP9_HIGHBITDEPTH |
393 | | |
394 | 0 | switch (tx_size) { |
395 | 0 | case TX_32X32: |
396 | 0 | fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
397 | 0 | vp9_quantize_fp_32x32(coeff, 1024, p, qcoeff, dqcoeff, pd->dequant, eob, |
398 | 0 | scan_order); |
399 | 0 | break; |
400 | 0 | case TX_16X16: |
401 | 0 | vpx_fdct16x16(src_diff, coeff, diff_stride); |
402 | 0 | vp9_quantize_fp(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob, |
403 | 0 | scan_order); |
404 | 0 | break; |
405 | 0 | case TX_8X8: |
406 | 0 | vpx_fdct8x8(src_diff, coeff, diff_stride); |
407 | 0 | vp9_quantize_fp(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob, |
408 | 0 | scan_order); |
409 | |
|
410 | 0 | break; |
411 | 0 | default: |
412 | 0 | assert(tx_size == TX_4X4); |
413 | 0 | x->fwd_txfm4x4(src_diff, coeff, diff_stride); |
414 | 0 | vp9_quantize_fp(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob, |
415 | 0 | scan_order); |
416 | 0 | break; |
417 | 0 | } |
418 | 0 | } |
419 | | |
420 | | void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, |
421 | 301k | BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { |
422 | 301k | MACROBLOCKD *const xd = &x->e_mbd; |
423 | 301k | const struct macroblock_plane *const p = &x->plane[plane]; |
424 | 301k | const struct macroblockd_plane *const pd = &xd->plane[plane]; |
425 | 301k | tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); |
426 | 301k | tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
427 | 301k | tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
428 | 301k | uint16_t *const eob = &p->eobs[block]; |
429 | 301k | const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
430 | 301k | const int16_t *src_diff; |
431 | 301k | src_diff = &p->src_diff[4 * (row * diff_stride + col)]; |
432 | | // skip block condition should be handled before this is called. |
433 | 301k | assert(!x->skip_block); |
434 | | |
435 | 301k | #if CONFIG_VP9_HIGHBITDEPTH |
436 | 301k | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
437 | 0 | switch (tx_size) { |
438 | 0 | case TX_32X32: |
439 | 0 | vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); |
440 | 0 | vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, |
441 | 0 | dqcoeff, pd->dequant[0], eob); |
442 | 0 | break; |
443 | 0 | case TX_16X16: |
444 | 0 | vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); |
445 | 0 | vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, |
446 | 0 | dqcoeff, pd->dequant[0], eob); |
447 | 0 | break; |
448 | 0 | case TX_8X8: |
449 | 0 | vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); |
450 | 0 | vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, |
451 | 0 | dqcoeff, pd->dequant[0], eob); |
452 | 0 | break; |
453 | 0 | default: |
454 | 0 | assert(tx_size == TX_4X4); |
455 | 0 | x->fwd_txfm4x4(src_diff, coeff, diff_stride); |
456 | 0 | vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, |
457 | 0 | dqcoeff, pd->dequant[0], eob); |
458 | 0 | break; |
459 | 0 | } |
460 | 0 | return; |
461 | 0 | } |
462 | 301k | #endif // CONFIG_VP9_HIGHBITDEPTH |
463 | | |
464 | 301k | switch (tx_size) { |
465 | 2.09k | case TX_32X32: |
466 | 2.09k | vpx_fdct32x32_1(src_diff, coeff, diff_stride); |
467 | 2.09k | vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff, |
468 | 2.09k | pd->dequant[0], eob); |
469 | 2.09k | break; |
470 | 9.27k | case TX_16X16: |
471 | 9.27k | vpx_fdct16x16_1(src_diff, coeff, diff_stride); |
472 | 9.27k | vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff, |
473 | 9.27k | pd->dequant[0], eob); |
474 | 9.27k | break; |
475 | 50.1k | case TX_8X8: |
476 | 50.1k | vpx_fdct8x8_1(src_diff, coeff, diff_stride); |
477 | 50.1k | vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff, |
478 | 50.1k | pd->dequant[0], eob); |
479 | 50.1k | break; |
480 | 240k | default: |
481 | 240k | assert(tx_size == TX_4X4); |
482 | 240k | x->fwd_txfm4x4(src_diff, coeff, diff_stride); |
483 | 240k | vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff, |
484 | 240k | pd->dequant[0], eob); |
485 | 240k | break; |
486 | 301k | } |
487 | 301k | } |
488 | | |
489 | | void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, |
490 | 68.2M | BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { |
491 | 68.2M | MACROBLOCKD *const xd = &x->e_mbd; |
492 | 68.2M | const struct macroblock_plane *const p = &x->plane[plane]; |
493 | 68.2M | const struct macroblockd_plane *const pd = &xd->plane[plane]; |
494 | 68.2M | const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size]; |
495 | 68.2M | tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); |
496 | 68.2M | tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
497 | 68.2M | tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
498 | 68.2M | uint16_t *const eob = &p->eobs[block]; |
499 | 68.2M | const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
500 | 68.2M | const int16_t *src_diff; |
501 | 68.2M | src_diff = &p->src_diff[4 * (row * diff_stride + col)]; |
502 | | // skip block condition should be handled before this is called. |
503 | 68.2M | assert(!x->skip_block); |
504 | | |
505 | 68.2M | #if CONFIG_VP9_HIGHBITDEPTH |
506 | 68.2M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
507 | 0 | switch (tx_size) { |
508 | 0 | case TX_32X32: |
509 | 0 | highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
510 | 0 | vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob, |
511 | 0 | scan_order); |
512 | 0 | break; |
513 | 0 | case TX_16X16: |
514 | 0 | vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); |
515 | 0 | vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob, |
516 | 0 | scan_order); |
517 | 0 | break; |
518 | 0 | case TX_8X8: |
519 | 0 | vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); |
520 | 0 | vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob, |
521 | 0 | scan_order); |
522 | 0 | break; |
523 | 0 | default: |
524 | 0 | assert(tx_size == TX_4X4); |
525 | 0 | x->fwd_txfm4x4(src_diff, coeff, diff_stride); |
526 | 0 | vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob, |
527 | 0 | scan_order); |
528 | 0 | break; |
529 | 0 | } |
530 | 0 | return; |
531 | 0 | } |
532 | 68.2M | #endif // CONFIG_VP9_HIGHBITDEPTH |
533 | | |
534 | 68.2M | switch (tx_size) { |
535 | 1.03M | case TX_32X32: |
536 | 1.03M | fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
537 | 1.03M | vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob, |
538 | 1.03M | scan_order); |
539 | 1.03M | break; |
540 | 4.25M | case TX_16X16: |
541 | 4.25M | vpx_fdct16x16(src_diff, coeff, diff_stride); |
542 | 4.25M | vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob, |
543 | 4.25M | scan_order); |
544 | 4.25M | break; |
545 | 17.6M | case TX_8X8: |
546 | 17.6M | vpx_fdct8x8(src_diff, coeff, diff_stride); |
547 | 17.6M | vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob, |
548 | 17.6M | scan_order); |
549 | 17.6M | break; |
550 | 45.2M | default: |
551 | 45.2M | assert(tx_size == TX_4X4); |
552 | 45.2M | x->fwd_txfm4x4(src_diff, coeff, diff_stride); |
553 | 45.2M | vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob, |
554 | 45.2M | scan_order); |
555 | 45.2M | break; |
556 | 68.2M | } |
557 | 68.2M | } |
558 | | |
559 | | static void encode_block(int plane, int block, int row, int col, |
560 | 10.9M | BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { |
561 | 10.9M | struct encode_b_args *const args = arg; |
562 | | #if CONFIG_MISMATCH_DEBUG |
563 | | int mi_row = args->mi_row; |
564 | | int mi_col = args->mi_col; |
565 | | int output_enabled = args->output_enabled; |
566 | | #endif |
567 | 10.9M | MACROBLOCK *const x = args->x; |
568 | 10.9M | MACROBLOCKD *const xd = &x->e_mbd; |
569 | 10.9M | struct macroblock_plane *const p = &x->plane[plane]; |
570 | 10.9M | struct macroblockd_plane *const pd = &xd->plane[plane]; |
571 | 10.9M | tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
572 | 10.9M | uint8_t *dst; |
573 | 10.9M | ENTROPY_CONTEXT *a, *l; |
574 | 10.9M | dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; |
575 | 10.9M | a = &args->ta[col]; |
576 | 10.9M | l = &args->tl[row]; |
577 | | |
578 | | // TODO(jingning): per transformed block zero forcing only enabled for |
579 | | // luma component. will integrate chroma components as well. |
580 | 10.9M | if (x->zcoeff_blk[tx_size][block] && plane == 0) { |
581 | 3.59M | p->eobs[block] = 0; |
582 | 3.59M | *a = *l = 0; |
583 | | #if CONFIG_MISMATCH_DEBUG |
584 | | goto encode_block_end; |
585 | | #else |
586 | 3.59M | return; |
587 | 3.59M | #endif |
588 | 3.59M | } |
589 | | |
590 | 7.36M | if (!x->skip_recode) { |
591 | 7.36M | if (x->quant_fp) { |
592 | | // Encoding process for rtc mode |
593 | 0 | if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) { |
594 | | // skip forward transform |
595 | 0 | p->eobs[block] = 0; |
596 | 0 | *a = *l = 0; |
597 | | #if CONFIG_MISMATCH_DEBUG |
598 | | goto encode_block_end; |
599 | | #else |
600 | 0 | return; |
601 | 0 | #endif |
602 | 0 | } else { |
603 | 0 | vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); |
604 | 0 | } |
605 | 7.36M | } else { |
606 | 7.36M | if (max_txsize_lookup[plane_bsize] == tx_size) { |
607 | 4.58M | int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); |
608 | 4.58M | if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { |
609 | | // full forward transform and quantization |
610 | 4.58M | vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); |
611 | 4.58M | } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { |
612 | | // fast path forward transform and quantization |
613 | 0 | vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); |
614 | 0 | } else { |
615 | | // skip forward transform |
616 | 0 | p->eobs[block] = 0; |
617 | 0 | *a = *l = 0; |
618 | | #if CONFIG_MISMATCH_DEBUG |
619 | | goto encode_block_end; |
620 | | #else |
621 | 0 | return; |
622 | 0 | #endif |
623 | 0 | } |
624 | 4.58M | } else { |
625 | 2.77M | vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); |
626 | 2.77M | } |
627 | 7.36M | } |
628 | 7.36M | } |
629 | | |
630 | 7.36M | if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { |
631 | 0 | const int ctx = combine_entropy_contexts(*a, *l); |
632 | 0 | *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0; |
633 | 7.36M | } else { |
634 | 7.36M | *a = *l = p->eobs[block] > 0; |
635 | 7.36M | } |
636 | | |
637 | 7.36M | if (p->eobs[block]) *(args->skip) = 0; |
638 | | |
639 | 7.36M | if (x->skip_encode || p->eobs[block] == 0) { |
640 | | #if CONFIG_MISMATCH_DEBUG |
641 | | goto encode_block_end; |
642 | | #else |
643 | 379k | return; |
644 | 379k | #endif |
645 | 379k | } |
646 | 6.98M | #if CONFIG_VP9_HIGHBITDEPTH |
647 | 6.98M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
648 | 0 | uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); |
649 | 0 | switch (tx_size) { |
650 | 0 | case TX_32X32: |
651 | 0 | vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], |
652 | 0 | xd->bd); |
653 | 0 | break; |
654 | 0 | case TX_16X16: |
655 | 0 | vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], |
656 | 0 | xd->bd); |
657 | 0 | break; |
658 | 0 | case TX_8X8: |
659 | 0 | vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], |
660 | 0 | xd->bd); |
661 | 0 | break; |
662 | 0 | default: |
663 | 0 | assert(tx_size == TX_4X4); |
664 | | // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
665 | | // which is significant (not just an optimization) for the lossless |
666 | | // case. |
667 | 0 | x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], |
668 | 0 | xd->bd); |
669 | 0 | break; |
670 | 0 | } |
671 | | #if CONFIG_MISMATCH_DEBUG |
672 | | goto encode_block_end; |
673 | | #else |
674 | 0 | return; |
675 | 0 | #endif |
676 | 0 | } |
677 | 6.98M | #endif // CONFIG_VP9_HIGHBITDEPTH |
678 | | |
679 | 6.98M | switch (tx_size) { |
680 | 7.59k | case TX_32X32: |
681 | 7.59k | vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
682 | 7.59k | break; |
683 | 52.4k | case TX_16X16: |
684 | 52.4k | vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
685 | 52.4k | break; |
686 | 362k | case TX_8X8: |
687 | 362k | vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
688 | 362k | break; |
689 | 6.55M | default: |
690 | 6.55M | assert(tx_size == TX_4X4); |
691 | | // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
692 | | // which is significant (not just an optimization) for the lossless |
693 | | // case. |
694 | 6.55M | x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
695 | 6.55M | break; |
696 | 6.98M | } |
697 | | #if CONFIG_MISMATCH_DEBUG |
698 | | encode_block_end: |
699 | | if (output_enabled) { |
700 | | int pixel_c, pixel_r; |
701 | | int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2); |
702 | | int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2); |
703 | | mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row, |
704 | | pd->subsampling_x, pd->subsampling_y); |
705 | | mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, |
706 | | blk_w, blk_h, |
707 | | xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); |
708 | | } |
709 | | #endif |
710 | 6.98M | } |
711 | | |
712 | | static void encode_block_pass1(int plane, int block, int row, int col, |
713 | | BLOCK_SIZE plane_bsize, TX_SIZE tx_size, |
714 | 0 | void *arg) { |
715 | 0 | MACROBLOCK *const x = (MACROBLOCK *)arg; |
716 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
717 | 0 | struct macroblock_plane *const p = &x->plane[plane]; |
718 | 0 | struct macroblockd_plane *const pd = &xd->plane[plane]; |
719 | 0 | tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
720 | 0 | uint8_t *dst; |
721 | 0 | dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; |
722 | |
|
723 | 0 | vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); |
724 | |
|
725 | 0 | if (p->eobs[block] > 0) { |
726 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
727 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
728 | 0 | x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride, |
729 | 0 | p->eobs[block], xd->bd); |
730 | 0 | return; |
731 | 0 | } |
732 | 0 | #endif // CONFIG_VP9_HIGHBITDEPTH |
733 | 0 | x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
734 | 0 | } |
735 | 0 | } |
736 | | |
737 | 0 | void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { |
738 | 0 | vp9_subtract_plane(x, bsize, 0); |
739 | 0 | vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, |
740 | 0 | encode_block_pass1, x); |
741 | 0 | } |
742 | | |
743 | | void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, |
744 | 2.29M | int output_enabled) { |
745 | 2.29M | MACROBLOCKD *const xd = &x->e_mbd; |
746 | 2.29M | struct optimize_ctx ctx; |
747 | 2.29M | MODE_INFO *mi = xd->mi[0]; |
748 | 2.29M | int plane; |
749 | | #if CONFIG_MISMATCH_DEBUG |
750 | | struct encode_b_args arg = { x, |
751 | | 1, // enable_trellis_opt |
752 | | 0.0, // trellis_opt_thresh |
753 | | NULL, // &sse_calc_done |
754 | | NULL, // &sse |
755 | | NULL, // above entropy context |
756 | | NULL, // left entropy context |
757 | | &mi->skip, mi_row, mi_col, output_enabled }; |
758 | | #else |
759 | 2.29M | struct encode_b_args arg = { x, |
760 | 2.29M | 1, // enable_trellis_opt |
761 | 2.29M | 0.0, // trellis_opt_thresh |
762 | 2.29M | NULL, // &sse_calc_done |
763 | 2.29M | NULL, // &sse |
764 | 2.29M | NULL, // above entropy context |
765 | 2.29M | NULL, // left entropy context |
766 | 2.29M | &mi->skip }; |
767 | 2.29M | (void)mi_row; |
768 | 2.29M | (void)mi_col; |
769 | 2.29M | (void)output_enabled; |
770 | 2.29M | #endif |
771 | | |
772 | 2.29M | mi->skip = 1; |
773 | | |
774 | 2.29M | if (x->skip) return; |
775 | | |
776 | 7.89M | for (plane = 0; plane < MAX_MB_PLANE; ++plane) { |
777 | 5.91M | if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane); |
778 | | |
779 | 5.91M | if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { |
780 | 0 | const struct macroblockd_plane *const pd = &xd->plane[plane]; |
781 | 0 | const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; |
782 | 0 | vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], |
783 | 0 | ctx.tl[plane]); |
784 | 0 | arg.enable_trellis_opt = 1; |
785 | 5.91M | } else { |
786 | 5.91M | arg.enable_trellis_opt = 0; |
787 | 5.91M | } |
788 | 5.91M | arg.ta = ctx.ta[plane]; |
789 | 5.91M | arg.tl = ctx.tl[plane]; |
790 | | |
791 | 5.91M | vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, |
792 | 5.91M | &arg); |
793 | 5.91M | } |
794 | 1.97M | } |
795 | | |
796 | | void vp9_encode_block_intra(int plane, int block, int row, int col, |
797 | | BLOCK_SIZE plane_bsize, TX_SIZE tx_size, |
798 | 296M | void *arg) { |
799 | 296M | struct encode_b_args *const args = arg; |
800 | 296M | MACROBLOCK *const x = args->x; |
801 | 296M | MACROBLOCKD *const xd = &x->e_mbd; |
802 | 296M | MODE_INFO *mi = xd->mi[0]; |
803 | 296M | struct macroblock_plane *const p = &x->plane[plane]; |
804 | 296M | struct macroblockd_plane *const pd = &xd->plane[plane]; |
805 | 296M | tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); |
806 | 296M | tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
807 | 296M | tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
808 | 296M | const ScanOrder *scan_order; |
809 | 296M | TX_TYPE tx_type = DCT_DCT; |
810 | 296M | PREDICTION_MODE mode; |
811 | 296M | const int bwl = b_width_log2_lookup[plane_bsize]; |
812 | 296M | const int diff_stride = 4 * (1 << bwl); |
813 | 296M | uint8_t *src, *dst; |
814 | 296M | int16_t *src_diff; |
815 | 296M | uint16_t *eob = &p->eobs[block]; |
816 | 296M | const int src_stride = p->src.stride; |
817 | 296M | const int dst_stride = pd->dst.stride; |
818 | 296M | int enable_trellis_opt = !x->skip_recode; |
819 | 296M | ENTROPY_CONTEXT *a = NULL; |
820 | 296M | ENTROPY_CONTEXT *l = NULL; |
821 | 296M | int entropy_ctx = 0; |
822 | 296M | dst = &pd->dst.buf[4 * (row * dst_stride + col)]; |
823 | 296M | src = &p->src.buf[4 * (row * src_stride + col)]; |
824 | 296M | src_diff = &p->src_diff[4 * (row * diff_stride + col)]; |
825 | | |
826 | 296M | if (tx_size == TX_4X4) { |
827 | 226M | tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block); |
828 | 226M | scan_order = &vp9_scan_orders[TX_4X4][tx_type]; |
829 | 226M | mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode; |
830 | 226M | } else { |
831 | 69.9M | mode = plane == 0 ? mi->mode : mi->uv_mode; |
832 | 69.9M | if (tx_size == TX_32X32) { |
833 | 2.67M | scan_order = &vp9_default_scan_orders[TX_32X32]; |
834 | 67.3M | } else { |
835 | 67.3M | tx_type = get_tx_type(get_plane_type(plane), xd); |
836 | 67.3M | scan_order = &vp9_scan_orders[tx_size][tx_type]; |
837 | 67.3M | } |
838 | 69.9M | } |
839 | | |
840 | 296M | vp9_predict_intra_block( |
841 | 296M | xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst, |
842 | 296M | (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst, |
843 | 296M | dst_stride, col, row, plane); |
844 | | |
845 | | // skip block condition should be handled before this is called. |
846 | 296M | assert(!x->skip_block); |
847 | | |
848 | 296M | if (!x->skip_recode) { |
849 | 296M | const int tx_size_in_pixels = (1 << tx_size) << 2; |
850 | 296M | #if CONFIG_VP9_HIGHBITDEPTH |
851 | 296M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
852 | 0 | vpx_highbd_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff, |
853 | 0 | diff_stride, src, src_stride, dst, dst_stride, |
854 | 0 | xd->bd); |
855 | 296M | } else { |
856 | 296M | vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff, |
857 | 296M | diff_stride, src, src_stride, dst, dst_stride); |
858 | 296M | } |
859 | | #else |
860 | | vpx_subtract_block(tx_size_in_pixels, tx_size_in_pixels, src_diff, |
861 | | diff_stride, src, src_stride, dst, dst_stride); |
862 | | #endif |
863 | 296M | enable_trellis_opt = do_trellis_opt(pd, src_diff, diff_stride, row, col, |
864 | 296M | plane_bsize, tx_size, args); |
865 | 296M | } |
866 | | |
867 | 296M | if (enable_trellis_opt) { |
868 | 12.6M | a = &args->ta[col]; |
869 | 12.6M | l = &args->tl[row]; |
870 | 12.6M | entropy_ctx = combine_entropy_contexts(*a, *l); |
871 | 12.6M | } |
872 | | |
873 | 296M | #if CONFIG_VP9_HIGHBITDEPTH |
874 | 296M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
875 | 0 | uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); |
876 | 0 | switch (tx_size) { |
877 | 0 | case TX_32X32: |
878 | 0 | if (!x->skip_recode) { |
879 | 0 | highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
880 | 0 | vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, |
881 | 0 | eob, scan_order); |
882 | 0 | } |
883 | 0 | if (enable_trellis_opt) { |
884 | 0 | *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; |
885 | 0 | } |
886 | 0 | if (!x->skip_encode && *eob) { |
887 | 0 | vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); |
888 | 0 | } |
889 | 0 | break; |
890 | 0 | case TX_16X16: |
891 | 0 | if (!x->skip_recode) { |
892 | 0 | if (tx_type == DCT_DCT) |
893 | 0 | vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); |
894 | 0 | else |
895 | 0 | vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); |
896 | 0 | vpx_highbd_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, |
897 | 0 | eob, scan_order); |
898 | 0 | } |
899 | 0 | if (enable_trellis_opt) { |
900 | 0 | *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; |
901 | 0 | } |
902 | 0 | if (!x->skip_encode && *eob) { |
903 | 0 | vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob, |
904 | 0 | xd->bd); |
905 | 0 | } |
906 | 0 | break; |
907 | 0 | case TX_8X8: |
908 | 0 | if (!x->skip_recode) { |
909 | 0 | if (tx_type == DCT_DCT) |
910 | 0 | vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); |
911 | 0 | else |
912 | 0 | vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); |
913 | 0 | vpx_highbd_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob, |
914 | 0 | scan_order); |
915 | 0 | } |
916 | 0 | if (enable_trellis_opt) { |
917 | 0 | *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; |
918 | 0 | } |
919 | 0 | if (!x->skip_encode && *eob) { |
920 | 0 | vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob, |
921 | 0 | xd->bd); |
922 | 0 | } |
923 | 0 | break; |
924 | 0 | default: |
925 | 0 | assert(tx_size == TX_4X4); |
926 | 0 | if (!x->skip_recode) { |
927 | 0 | if (tx_type != DCT_DCT) |
928 | 0 | vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); |
929 | 0 | else |
930 | 0 | x->fwd_txfm4x4(src_diff, coeff, diff_stride); |
931 | 0 | vpx_highbd_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob, |
932 | 0 | scan_order); |
933 | 0 | } |
934 | 0 | if (enable_trellis_opt) { |
935 | 0 | *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; |
936 | 0 | } |
937 | 0 | if (!x->skip_encode && *eob) { |
938 | 0 | if (tx_type == DCT_DCT) { |
939 | | // this is like vp9_short_idct4x4 but has a special case around |
940 | | // eob<=1 which is significant (not just an optimization) for the |
941 | | // lossless case. |
942 | 0 | x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); |
943 | 0 | } else { |
944 | 0 | vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type, |
945 | 0 | xd->bd); |
946 | 0 | } |
947 | 0 | } |
948 | 0 | break; |
949 | 0 | } |
950 | 0 | if (*eob) *(args->skip) = 0; |
951 | 0 | return; |
952 | 0 | } |
953 | 296M | #endif // CONFIG_VP9_HIGHBITDEPTH |
954 | | |
955 | 296M | switch (tx_size) { |
956 | 2.67M | case TX_32X32: |
957 | 2.67M | if (!x->skip_recode) { |
958 | 2.67M | fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
959 | 2.67M | vpx_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob, |
960 | 2.67M | scan_order); |
961 | 2.67M | } |
962 | 2.67M | if (enable_trellis_opt) { |
963 | 206k | *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; |
964 | 206k | } |
965 | 2.67M | if (!x->skip_encode && *eob) |
966 | 2.10M | vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob); |
967 | 2.67M | break; |
968 | 11.3M | case TX_16X16: |
969 | 11.3M | if (!x->skip_recode) { |
970 | 11.3M | vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); |
971 | 11.3M | vpx_quantize_b(coeff, 256, p, qcoeff, dqcoeff, pd->dequant, eob, |
972 | 11.3M | scan_order); |
973 | 11.3M | } |
974 | 11.3M | if (enable_trellis_opt) { |
975 | 447k | *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; |
976 | 447k | } |
977 | 11.3M | if (!x->skip_encode && *eob) |
978 | 9.93M | vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob); |
979 | 11.3M | break; |
980 | 55.9M | case TX_8X8: |
981 | 55.9M | if (!x->skip_recode) { |
982 | 55.9M | vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); |
983 | 55.9M | vpx_quantize_b(coeff, 64, p, qcoeff, dqcoeff, pd->dequant, eob, |
984 | 55.9M | scan_order); |
985 | 55.9M | } |
986 | 55.9M | if (enable_trellis_opt) { |
987 | 2.17M | *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; |
988 | 2.17M | } |
989 | 55.9M | if (!x->skip_encode && *eob) |
990 | 48.1M | vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); |
991 | 55.9M | break; |
992 | 226M | default: |
993 | 226M | assert(tx_size == TX_4X4); |
994 | 226M | if (!x->skip_recode) { |
995 | 226M | if (tx_type != DCT_DCT) |
996 | 22.3M | vp9_fht4x4(src_diff, coeff, diff_stride, tx_type); |
997 | 203M | else |
998 | 203M | x->fwd_txfm4x4(src_diff, coeff, diff_stride); |
999 | 226M | vpx_quantize_b(coeff, 16, p, qcoeff, dqcoeff, pd->dequant, eob, |
1000 | 226M | scan_order); |
1001 | 226M | } |
1002 | 226M | if (enable_trellis_opt) { |
1003 | 9.83M | *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; |
1004 | 9.83M | } |
1005 | 226M | if (!x->skip_encode && *eob) { |
1006 | 179M | if (tx_type == DCT_DCT) |
1007 | | // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
1008 | | // which is significant (not just an optimization) for the lossless |
1009 | | // case. |
1010 | 160M | x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob); |
1011 | 19.0M | else |
1012 | 19.0M | vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); |
1013 | 179M | } |
1014 | 226M | break; |
1015 | 296M | } |
1016 | 296M | if (*eob) *(args->skip) = 0; |
1017 | 296M | } |
1018 | | |
1019 | | void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, |
1020 | 15.7M | int enable_trellis_opt) { |
1021 | 15.7M | const MACROBLOCKD *const xd = &x->e_mbd; |
1022 | 15.7M | struct optimize_ctx ctx; |
1023 | | #if CONFIG_MISMATCH_DEBUG |
1024 | | // TODO(angiebird): make mismatch_debug support intra mode |
1025 | | struct encode_b_args arg = { |
1026 | | x, |
1027 | | enable_trellis_opt, |
1028 | | 0.0, // trellis_opt_thresh |
1029 | | NULL, // &sse_calc_done |
1030 | | NULL, // &sse |
1031 | | ctx.ta[plane], |
1032 | | ctx.tl[plane], |
1033 | | &xd->mi[0]->skip, |
1034 | | 0, // mi_row |
1035 | | 0, // mi_col |
1036 | | 0 // output_enabled |
1037 | | }; |
1038 | | #else |
1039 | 15.7M | struct encode_b_args arg = { x, |
1040 | 15.7M | enable_trellis_opt, |
1041 | 15.7M | 0.0, // trellis_opt_thresh |
1042 | 15.7M | NULL, // &sse_calc_done |
1043 | 15.7M | NULL, // &sse |
1044 | 15.7M | ctx.ta[plane], |
1045 | 15.7M | ctx.tl[plane], |
1046 | 15.7M | &xd->mi[0]->skip }; |
1047 | 15.7M | #endif |
1048 | | |
1049 | 15.7M | if (enable_trellis_opt && x->optimize && |
1050 | 15.7M | (!x->skip_recode || !x->skip_optimize)) { |
1051 | 0 | const struct macroblockd_plane *const pd = &xd->plane[plane]; |
1052 | 0 | const TX_SIZE tx_size = |
1053 | 0 | plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size; |
1054 | 0 | vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); |
1055 | 15.7M | } else { |
1056 | 15.7M | arg.enable_trellis_opt = 0; |
1057 | 15.7M | } |
1058 | | |
1059 | 15.7M | vp9_foreach_transformed_block_in_plane(xd, bsize, plane, |
1060 | 15.7M | vp9_encode_block_intra, &arg); |
1061 | 15.7M | } |