/src/libvpx/vp9/decoder/vp9_detokenize.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "vpx_mem/vpx_mem.h" |
12 | | #include "vpx_ports/mem.h" |
13 | | |
14 | | #include "vp9/common/vp9_blockd.h" |
15 | | #include "vp9/common/vp9_common.h" |
16 | | #include "vp9/common/vp9_entropy.h" |
17 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
18 | | #include "vp9/common/vp9_idct.h" |
19 | | #endif |
20 | | |
21 | | #include "vp9/decoder/vp9_detokenize.h" |
22 | | |
23 | 116M | #define EOB_CONTEXT_NODE 0 |
24 | 98.7M | #define ZERO_CONTEXT_NODE 1 |
25 | 44.6M | #define ONE_CONTEXT_NODE 2 |
26 | | |
27 | | #define INCREMENT_COUNT(token) \ |
28 | 170M | do { \ |
29 | 170M | if (counts) ++coef_counts[band][ctx][token]; \ |
30 | 170M | } while (0) |
31 | | |
32 | | static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value, |
33 | 354M | int *count, unsigned int *range) { |
34 | 354M | const unsigned int split = (*range * prob + (256 - prob)) >> CHAR_BIT; |
35 | 354M | const BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); |
36 | | #if CONFIG_BITSTREAM_DEBUG |
37 | | const int queue_r = bitstream_queue_get_read(); |
38 | | const int frame_idx = bitstream_queue_get_frame_read(); |
39 | | int ref_result, ref_prob; |
40 | | bitstream_queue_pop(&ref_result, &ref_prob); |
41 | | if (prob != ref_prob) { |
42 | | fprintf(stderr, |
43 | | "\n *** [bit] prob error, frame_idx_r %d prob %d ref_prob %d " |
44 | | "queue_r %d\n", |
45 | | frame_idx, prob, ref_prob, queue_r); |
46 | | |
47 | | assert(0); |
48 | | } |
49 | | #endif |
50 | | |
51 | 354M | if (*count < 0) { |
52 | 4.32M | r->value = *value; |
53 | 4.32M | r->count = *count; |
54 | 4.32M | vpx_reader_fill(r); |
55 | 4.32M | *value = r->value; |
56 | 4.32M | *count = r->count; |
57 | 4.32M | } |
58 | | |
59 | 354M | if (*value >= bigsplit) { |
60 | 142M | *range = *range - split; |
61 | 142M | *value = *value - bigsplit; |
62 | 142M | { |
63 | 142M | const int shift = vpx_norm[*range]; |
64 | 142M | *range <<= shift; |
65 | 142M | *value <<= shift; |
66 | 142M | *count -= shift; |
67 | 142M | } |
68 | | #if CONFIG_BITSTREAM_DEBUG |
69 | | { |
70 | | const int bit = 1; |
71 | | if (bit != ref_result) { |
72 | | fprintf( |
73 | | stderr, |
74 | | "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d " |
75 | | "queue_r %d\n", |
76 | | frame_idx, bit, ref_result, queue_r); |
77 | | |
78 | | assert(0); |
79 | | } |
80 | | } |
81 | | #endif |
82 | 142M | return 1; |
83 | 142M | } |
84 | 211M | *range = split; |
85 | 211M | { |
86 | 211M | const int shift = vpx_norm[*range]; |
87 | 211M | *range <<= shift; |
88 | 211M | *value <<= shift; |
89 | 211M | *count -= shift; |
90 | 211M | } |
91 | | #if CONFIG_BITSTREAM_DEBUG |
92 | | { |
93 | | const int bit = 0; |
94 | | if (bit != ref_result) { |
95 | | fprintf(stderr, |
96 | | "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d " |
97 | | "queue_r %d\n", |
98 | | frame_idx, bit, ref_result, queue_r); |
99 | | |
100 | | assert(0); |
101 | | } |
102 | | } |
103 | | #endif |
104 | 211M | return 0; |
105 | 354M | } |
106 | | |
107 | | static INLINE int read_coeff(vpx_reader *r, const vpx_prob *probs, int n, |
108 | 3.43M | BD_VALUE *value, int *count, unsigned int *range) { |
109 | 3.43M | int i, val = 0; |
110 | 15.8M | for (i = 0; i < n; ++i) |
111 | 12.3M | val = (val << 1) | read_bool(r, probs[i], value, count, range); |
112 | 3.43M | return val; |
113 | 3.43M | } |
114 | | |
115 | | static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type, |
116 | | tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, |
117 | | int ctx, const int16_t *scan, const int16_t *nb, |
118 | 72.3M | vpx_reader *r) { |
119 | 72.3M | FRAME_COUNTS *counts = xd->counts; |
120 | 72.3M | const int max_eob = 16 << (tx_size << 1); |
121 | 72.3M | const FRAME_CONTEXT *const fc = xd->fc; |
122 | 72.3M | const int ref = is_inter_block(xd->mi[0]); |
123 | 72.3M | int band, c = 0; |
124 | 72.3M | const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = |
125 | 72.3M | fc->coef_probs[tx_size][type][ref]; |
126 | 72.3M | const vpx_prob *prob; |
127 | 72.3M | unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; |
128 | 72.3M | unsigned int(*eob_branch_count)[COEFF_CONTEXTS]; |
129 | 72.3M | uint8_t token_cache[32 * 32]; |
130 | 72.3M | const uint8_t *band_translate = get_band_translate(tx_size); |
131 | 72.3M | const int dq_shift = (tx_size == TX_32X32); |
132 | 72.3M | int v; |
133 | 72.3M | int16_t dqv = dq[0]; |
134 | 72.3M | const uint8_t *const cat6_prob = |
135 | 72.3M | #if CONFIG_VP9_HIGHBITDEPTH |
136 | 72.3M | (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 |
137 | 72.3M | : (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 |
138 | 67.2M | : |
139 | 67.2M | #endif // CONFIG_VP9_HIGHBITDEPTH |
140 | 67.2M | vp9_cat6_prob; |
141 | 72.3M | const int cat6_bits = |
142 | 72.3M | #if CONFIG_VP9_HIGHBITDEPTH |
143 | 72.3M | (xd->bd == VPX_BITS_12) ? 18 |
144 | 72.3M | : (xd->bd == VPX_BITS_10) ? 16 |
145 | 67.2M | : |
146 | 67.2M | #endif // CONFIG_VP9_HIGHBITDEPTH |
147 | 67.2M | 14; |
148 | | // Keep value, range, and count as locals. The compiler produces better |
149 | | // results with the locals than using r directly. |
150 | 72.3M | BD_VALUE value = r->value; |
151 | 72.3M | unsigned int range = r->range; |
152 | 72.3M | int count = r->count; |
153 | | |
154 | 72.3M | if (counts) { |
155 | 33.0M | coef_counts = counts->coef[tx_size][type][ref]; |
156 | 33.0M | eob_branch_count = counts->eob_branch[tx_size][type][ref]; |
157 | 33.0M | } |
158 | | |
159 | 117M | while (c < max_eob) { |
160 | 116M | int val = -1; |
161 | 116M | band = *band_translate++; |
162 | 116M | prob = coef_probs[band][ctx]; |
163 | 116M | if (counts) ++eob_branch_count[band][ctx]; |
164 | 116M | if (!read_bool(r, prob[EOB_CONTEXT_NODE], &value, &count, &range)) { |
165 | 71.9M | INCREMENT_COUNT(EOB_MODEL_TOKEN); |
166 | 71.9M | break; |
167 | 71.9M | } |
168 | | |
169 | 98.7M | while (!read_bool(r, prob[ZERO_CONTEXT_NODE], &value, &count, &range)) { |
170 | 54.0M | INCREMENT_COUNT(ZERO_TOKEN); |
171 | 54.0M | dqv = dq[1]; |
172 | 54.0M | token_cache[scan[c]] = 0; |
173 | 54.0M | ++c; |
174 | 54.0M | if (c >= max_eob) { |
175 | 148k | r->value = value; |
176 | 148k | r->range = range; |
177 | 148k | r->count = count; |
178 | 148k | return c; // zero tokens at the end (no eob token) |
179 | 148k | } |
180 | 53.9M | ctx = get_coef_context(nb, token_cache, c); |
181 | 53.9M | band = *band_translate++; |
182 | 53.9M | prob = coef_probs[band][ctx]; |
183 | 53.9M | } |
184 | | |
185 | 44.6M | if (read_bool(r, prob[ONE_CONTEXT_NODE], &value, &count, &range)) { |
186 | 14.3M | const vpx_prob *p = vp9_pareto8_full[prob[PIVOT_NODE] - 1]; |
187 | 14.3M | INCREMENT_COUNT(TWO_TOKEN); |
188 | 14.3M | if (read_bool(r, p[0], &value, &count, &range)) { |
189 | 3.43M | if (read_bool(r, p[3], &value, &count, &range)) { |
190 | 1.37M | token_cache[scan[c]] = 5; |
191 | 1.37M | if (read_bool(r, p[5], &value, &count, &range)) { |
192 | 588k | if (read_bool(r, p[7], &value, &count, &range)) { |
193 | 418k | val = CAT6_MIN_VAL + |
194 | 418k | read_coeff(r, cat6_prob, cat6_bits, &value, &count, &range); |
195 | 418k | } else { |
196 | 169k | val = CAT5_MIN_VAL + |
197 | 169k | read_coeff(r, vp9_cat5_prob, 5, &value, &count, &range); |
198 | 169k | } |
199 | 782k | } else if (read_bool(r, p[6], &value, &count, &range)) { |
200 | 276k | val = CAT4_MIN_VAL + |
201 | 276k | read_coeff(r, vp9_cat4_prob, 4, &value, &count, &range); |
202 | 505k | } else { |
203 | 505k | val = CAT3_MIN_VAL + |
204 | 505k | read_coeff(r, vp9_cat3_prob, 3, &value, &count, &range); |
205 | 505k | } |
206 | 2.06M | } else { |
207 | 2.06M | token_cache[scan[c]] = 4; |
208 | 2.06M | if (read_bool(r, p[4], &value, &count, &range)) { |
209 | 882k | val = CAT2_MIN_VAL + |
210 | 882k | read_coeff(r, vp9_cat2_prob, 2, &value, &count, &range); |
211 | 1.18M | } else { |
212 | 1.18M | val = CAT1_MIN_VAL + |
213 | 1.18M | read_coeff(r, vp9_cat1_prob, 1, &value, &count, &range); |
214 | 1.18M | } |
215 | 2.06M | } |
216 | 3.43M | #if CONFIG_VP9_HIGHBITDEPTH |
217 | | // val may use 18-bits |
218 | 3.43M | v = (int)(((int64_t)val * dqv) >> dq_shift); |
219 | | #else |
220 | | v = (val * dqv) >> dq_shift; |
221 | | #endif |
222 | 10.9M | } else { |
223 | 10.9M | if (read_bool(r, p[1], &value, &count, &range)) { |
224 | 3.86M | token_cache[scan[c]] = 3; |
225 | 3.86M | v = ((3 + read_bool(r, p[2], &value, &count, &range)) * dqv) >> |
226 | 3.86M | dq_shift; |
227 | 7.07M | } else { |
228 | 7.07M | token_cache[scan[c]] = 2; |
229 | 7.07M | v = (2 * dqv) >> dq_shift; |
230 | 7.07M | } |
231 | 10.9M | } |
232 | 30.2M | } else { |
233 | 30.2M | INCREMENT_COUNT(ONE_TOKEN); |
234 | 30.2M | token_cache[scan[c]] = 1; |
235 | 30.2M | v = dqv >> dq_shift; |
236 | 30.2M | } |
237 | | #if CONFIG_COEFFICIENT_RANGE_CHECKING |
238 | | #if CONFIG_VP9_HIGHBITDEPTH |
239 | | dqcoeff[scan[c]] = highbd_check_range( |
240 | | read_bool(r, 128, &value, &count, &range) ? -v : v, xd->bd); |
241 | | #else |
242 | | dqcoeff[scan[c]] = |
243 | | check_range(read_bool(r, 128, &value, &count, &range) ? -v : v); |
244 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
245 | | #else |
246 | 44.6M | if (read_bool(r, 128, &value, &count, &range)) { |
247 | 22.2M | dqcoeff[scan[c]] = (tran_low_t)-v; |
248 | 22.3M | } else { |
249 | 22.3M | dqcoeff[scan[c]] = (tran_low_t)v; |
250 | 22.3M | } |
251 | 44.6M | #endif // CONFIG_COEFFICIENT_RANGE_CHECKING |
252 | 44.6M | ++c; |
253 | 44.6M | ctx = get_coef_context(nb, token_cache, c); |
254 | 44.6M | dqv = dq[1]; |
255 | 44.6M | } |
256 | | |
257 | 72.2M | r->value = value; |
258 | 72.2M | r->range = range; |
259 | 72.2M | r->count = count; |
260 | 72.2M | return c; |
261 | 72.3M | } |
262 | | |
263 | | static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l, |
264 | 11.1M | int x, int y, unsigned int tx_size_in_blocks) { |
265 | 11.1M | if (xd->max_blocks_wide) { |
266 | 1.64M | if (tx_size_in_blocks + x > xd->max_blocks_wide) |
267 | 769k | *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8; |
268 | 1.64M | } |
269 | 11.1M | if (xd->max_blocks_high) { |
270 | 1.96M | if (tx_size_in_blocks + y > xd->max_blocks_high) |
271 | 816k | *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8; |
272 | 1.96M | } |
273 | 11.1M | } |
274 | | |
275 | | int vp9_decode_block_tokens(TileWorkerData *twd, int plane, const ScanOrder *sc, |
276 | 72.3M | int x, int y, TX_SIZE tx_size, int seg_id) { |
277 | 72.3M | vpx_reader *r = &twd->bit_reader; |
278 | 72.3M | MACROBLOCKD *xd = &twd->xd; |
279 | 72.3M | struct macroblockd_plane *const pd = &xd->plane[plane]; |
280 | 72.3M | const int16_t *const dequant = pd->seg_dequant[seg_id]; |
281 | 72.3M | int eob; |
282 | 72.3M | ENTROPY_CONTEXT *a = pd->above_context + x; |
283 | 72.3M | ENTROPY_CONTEXT *l = pd->left_context + y; |
284 | 72.3M | int ctx; |
285 | 72.3M | int ctx_shift_a = 0; |
286 | 72.3M | int ctx_shift_l = 0; |
287 | | |
288 | 72.3M | switch (tx_size) { |
289 | 61.2M | case TX_4X4: |
290 | 61.2M | ctx = a[0] != 0; |
291 | 61.2M | ctx += l[0] != 0; |
292 | 61.2M | eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, |
293 | 61.2M | dequant, ctx, sc->scan, sc->neighbors, r); |
294 | 61.2M | a[0] = l[0] = (eob > 0); |
295 | 61.2M | break; |
296 | 7.67M | case TX_8X8: |
297 | 7.67M | get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8); |
298 | 7.67M | ctx = !!*(const uint16_t *)a; |
299 | 7.67M | ctx += !!*(const uint16_t *)l; |
300 | 7.67M | eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, |
301 | 7.67M | dequant, ctx, sc->scan, sc->neighbors, r); |
302 | 7.67M | *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a; |
303 | 7.67M | *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l; |
304 | 7.67M | break; |
305 | 2.00M | case TX_16X16: |
306 | 2.00M | get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16); |
307 | 2.00M | ctx = !!*(const uint32_t *)a; |
308 | 2.00M | ctx += !!*(const uint32_t *)l; |
309 | 2.00M | eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, |
310 | 2.00M | dequant, ctx, sc->scan, sc->neighbors, r); |
311 | 2.00M | *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a; |
312 | 2.00M | *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l; |
313 | 2.00M | break; |
314 | 1.43M | case TX_32X32: |
315 | 1.43M | get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32); |
316 | | // NOTE: casting to uint64_t here is safe because the default memory |
317 | | // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte |
318 | | // boundaries. |
319 | 1.43M | ctx = !!*(const uint64_t *)a; |
320 | 1.43M | ctx += !!*(const uint64_t *)l; |
321 | 1.43M | eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, |
322 | 1.43M | dequant, ctx, sc->scan, sc->neighbors, r); |
323 | 1.43M | *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a; |
324 | 1.43M | *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l; |
325 | 1.43M | break; |
326 | 0 | default: |
327 | 0 | assert(0 && "Invalid transform size."); |
328 | 0 | eob = 0; |
329 | 0 | break; |
330 | 72.3M | } |
331 | | |
332 | 72.3M | return eob; |
333 | 72.3M | } |