/src/libvpx/vp9/encoder/vp9_aq_variance.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <math.h> |
12 | | |
13 | | #include "vpx_ports/mem.h" |
14 | | #include "vpx_ports/system_state.h" |
15 | | |
16 | | #include "vp9/encoder/vp9_aq_variance.h" |
17 | | |
18 | | #include "vp9/common/vp9_seg_common.h" |
19 | | |
20 | | #include "vp9/encoder/vp9_ratectrl.h" |
21 | | #include "vp9/encoder/vp9_rd.h" |
22 | | #include "vp9/encoder/vp9_encodeframe.h" |
23 | | #include "vp9/encoder/vp9_segmentation.h" |
24 | | |
25 | 1.81M | #define ENERGY_MIN (-4) |
26 | 1.81M | #define ENERGY_MAX (1) |
27 | | #define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1) |
28 | | #define ENERGY_IN_BOUNDS(energy) \ |
29 | 0 | assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX) |
30 | | |
31 | | static const double rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0, |
32 | | 0.75, 1.0, 1.0, 1.0 }; |
33 | | static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 }; |
34 | | |
35 | 0 | #define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN] |
36 | | |
37 | | DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; |
38 | | #if CONFIG_VP9_HIGHBITDEPTH |
39 | | DECLARE_ALIGNED(16, static const uint16_t, vp9_highbd_64_zeros[64]) = { 0 }; |
40 | | #endif |
41 | | |
42 | 0 | unsigned int vp9_vaq_segment_id(int energy) { |
43 | 0 | ENERGY_IN_BOUNDS(energy); |
44 | 0 | return SEGMENT_ID(energy); |
45 | 0 | } |
46 | | |
47 | 0 | void vp9_vaq_frame_setup(VP9_COMP *cpi) { |
48 | 0 | VP9_COMMON *cm = &cpi->common; |
49 | 0 | struct segmentation *seg = &cm->seg; |
50 | 0 | int i; |
51 | |
|
52 | 0 | if (frame_is_intra_only(cm) || cm->error_resilient_mode || |
53 | 0 | cpi->refresh_alt_ref_frame || cpi->force_update_segmentation || |
54 | 0 | (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { |
55 | 0 | vp9_enable_segmentation(seg); |
56 | 0 | vp9_clearall_segfeatures(seg); |
57 | |
|
58 | 0 | seg->abs_delta = SEGMENT_DELTADATA; |
59 | |
|
60 | 0 | vpx_clear_system_state(); |
61 | |
|
62 | 0 | for (i = 0; i < MAX_SEGMENTS; ++i) { |
63 | 0 | int qindex_delta = |
64 | 0 | vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, |
65 | 0 | rate_ratio[i], cm->bit_depth); |
66 | | |
67 | | // We don't allow qindex 0 in a segment if the base value is not 0. |
68 | | // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment |
69 | | // Q delta is sometimes applied without going back around the rd loop. |
70 | | // This could lead to an illegal combination of partition size and q. |
71 | 0 | if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { |
72 | 0 | qindex_delta = -cm->base_qindex + 1; |
73 | 0 | } |
74 | | |
75 | | // No need to enable SEG_LVL_ALT_Q for this segment. |
76 | 0 | if (rate_ratio[i] == 1.0) { |
77 | 0 | continue; |
78 | 0 | } |
79 | | |
80 | 0 | vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta); |
81 | 0 | vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); |
82 | 0 | } |
83 | 0 | } |
84 | 0 | } |
85 | | |
86 | | /* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions |
87 | | * of variance() and highbd_8_variance(). It should not. |
88 | | */ |
89 | | static void aq_variance(const uint8_t *a, int a_stride, const uint8_t *b, |
90 | | int b_stride, int w, int h, unsigned int *sse, |
91 | 196k | int *sum) { |
92 | 196k | int i, j; |
93 | | |
94 | 196k | *sum = 0; |
95 | 196k | *sse = 0; |
96 | | |
97 | 3.77M | for (i = 0; i < h; i++) { |
98 | 87.8M | for (j = 0; j < w; j++) { |
99 | 84.2M | const int diff = a[j] - b[j]; |
100 | 84.2M | *sum += diff; |
101 | 84.2M | *sse += diff * diff; |
102 | 84.2M | } |
103 | | |
104 | 3.57M | a += a_stride; |
105 | 3.57M | b += b_stride; |
106 | 3.57M | } |
107 | 196k | } |
108 | | |
109 | | #if CONFIG_VP9_HIGHBITDEPTH |
110 | | static void aq_highbd_variance64(const uint8_t *a8, int a_stride, |
111 | | const uint8_t *b8, int b_stride, int w, int h, |
112 | 0 | uint64_t *sse, int64_t *sum) { |
113 | 0 | int i, j; |
114 | |
|
115 | 0 | uint16_t *a = CONVERT_TO_SHORTPTR(a8); |
116 | 0 | uint16_t *b = CONVERT_TO_SHORTPTR(b8); |
117 | 0 | *sum = 0; |
118 | 0 | *sse = 0; |
119 | |
|
120 | 0 | for (i = 0; i < h; i++) { |
121 | 0 | for (j = 0; j < w; j++) { |
122 | 0 | const int diff = a[j] - b[j]; |
123 | 0 | *sum += diff; |
124 | 0 | *sse += diff * diff; |
125 | 0 | } |
126 | 0 | a += a_stride; |
127 | 0 | b += b_stride; |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
132 | | |
133 | | static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, |
134 | 10.4M | BLOCK_SIZE bs) { |
135 | 10.4M | MACROBLOCKD *xd = &x->e_mbd; |
136 | 10.4M | unsigned int var, sse; |
137 | 10.4M | int right_overflow = |
138 | 10.4M | (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0; |
139 | 10.4M | int bottom_overflow = |
140 | 10.4M | (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0; |
141 | | |
142 | 10.4M | if (right_overflow || bottom_overflow) { |
143 | 196k | const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow; |
144 | 196k | const int bh = 8 * num_8x8_blocks_high_lookup[bs] - bottom_overflow; |
145 | 196k | int avg; |
146 | 196k | #if CONFIG_VP9_HIGHBITDEPTH |
147 | 196k | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
148 | 0 | uint64_t sse64 = 0; |
149 | 0 | int64_t sum64 = 0; |
150 | 0 | aq_highbd_variance64(x->plane[0].src.buf, x->plane[0].src.stride, |
151 | 0 | CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, |
152 | 0 | &sse64, &sum64); |
153 | 0 | sse = (unsigned int)(sse64 >> (2 * (xd->bd - 8))); |
154 | 0 | avg = (int)(sum64 >> (xd->bd - 8)); |
155 | 196k | } else { |
156 | 196k | aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, |
157 | 196k | bw, bh, &sse, &avg); |
158 | 196k | } |
159 | | #else |
160 | | aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, |
161 | | bw, bh, &sse, &avg); |
162 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
163 | 196k | var = sse - (unsigned int)(((int64_t)avg * avg) / (bw * bh)); |
164 | 196k | return (unsigned int)(((uint64_t)256 * var) / (bw * bh)); |
165 | 10.2M | } else { |
166 | 10.2M | #if CONFIG_VP9_HIGHBITDEPTH |
167 | 10.2M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
168 | 0 | var = |
169 | 0 | cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, |
170 | 0 | CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, &sse); |
171 | 10.2M | } else { |
172 | 10.2M | var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, |
173 | 10.2M | vp9_64_zeros, 0, &sse); |
174 | 10.2M | } |
175 | | #else |
176 | | var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, |
177 | | vp9_64_zeros, 0, &sse); |
178 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
179 | 10.2M | return (unsigned int)(((uint64_t)256 * var) >> num_pels_log2_lookup[bs]); |
180 | 10.2M | } |
181 | 10.4M | } |
182 | | |
183 | 2.61M | double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { |
184 | 2.61M | unsigned int var = block_variance(cpi, x, bs); |
185 | 2.61M | vpx_clear_system_state(); |
186 | 2.61M | return log(var + 1.0); |
187 | 2.61M | } |
188 | | |
189 | 3.63M | #define DEFAULT_E_MIDPOINT 10.0 |
190 | 1.81M | static int scale_block_energy(VP9_COMP *cpi, unsigned int block_var) { |
191 | 1.81M | double energy; |
192 | 1.81M | double energy_midpoint; |
193 | 1.81M | energy_midpoint = |
194 | 1.81M | (cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT; |
195 | 1.81M | energy = log(block_var + 1.0) - energy_midpoint; |
196 | 1.81M | return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX); |
197 | 1.81M | } |
198 | | #undef DEFAULT_E_MIDPOINT |
199 | | |
200 | | // Get the range of sub block energy values; |
201 | | void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, |
202 | | int mi_col, BLOCK_SIZE bsize, int *min_e, |
203 | 993k | int *max_e) { |
204 | 993k | VP9_COMMON *const cm = &cpi->common; |
205 | 993k | const int bw = num_8x8_blocks_wide_lookup[bsize]; |
206 | 993k | const int bh = num_8x8_blocks_high_lookup[bsize]; |
207 | 993k | const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); |
208 | 993k | const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); |
209 | 993k | int x, y; |
210 | | |
211 | 993k | if (xmis < bw || ymis < bh) { |
212 | 171k | vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); |
213 | 171k | *min_e = vp9_block_energy(cpi, mb, bsize); |
214 | 171k | *max_e = *min_e; |
215 | 822k | } else { |
216 | 822k | unsigned int var; |
217 | | // Because scale_block_energy is non-decreasing, we can find the min/max |
218 | | // block variance and scale afterwards. This avoids a costly scaling at |
219 | | // every iteration. |
220 | 822k | unsigned int min_var = UINT_MAX; |
221 | 822k | unsigned int max_var = 0; |
222 | | |
223 | 3.03M | for (y = 0; y < ymis; ++y) { |
224 | 9.92M | for (x = 0; x < xmis; ++x) { |
225 | 7.70M | vp9_setup_src_planes(mb, cpi->Source, mi_row + y, mi_col + x); |
226 | 7.70M | vpx_clear_system_state(); |
227 | 7.70M | var = block_variance(cpi, mb, BLOCK_8X8); |
228 | 7.70M | vpx_clear_system_state(); |
229 | 7.70M | min_var = VPXMIN(min_var, var); |
230 | 7.70M | max_var = VPXMAX(max_var, var); |
231 | 7.70M | } |
232 | 2.21M | } |
233 | 822k | *min_e = scale_block_energy(cpi, min_var); |
234 | 822k | *max_e = scale_block_energy(cpi, max_var); |
235 | 822k | } |
236 | | |
237 | | // Re-instate source pointers back to what they should have been on entry. |
238 | 993k | vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); |
239 | 993k | } |
240 | | |
241 | 171k | int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { |
242 | 171k | unsigned int var; |
243 | 171k | vpx_clear_system_state(); |
244 | 171k | var = block_variance(cpi, x, bs); |
245 | 171k | vpx_clear_system_state(); |
246 | 171k | return scale_block_energy(cpi, var); |
247 | 171k | } |