/src/libvpx/vpx_dsp/variance.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | #include <assert.h> |
11 | | #include <limits.h> |
12 | | #include <stdlib.h> |
13 | | |
14 | | #include "./vpx_config.h" |
15 | | #include "./vpx_dsp_rtcd.h" |
16 | | |
17 | | #include "vpx_ports/compiler_attributes.h" |
18 | | #include "vpx_ports/mem.h" |
19 | | #include "vpx/vpx_integer.h" |
20 | | |
21 | | #include "vpx_dsp/variance.h" |
22 | | |
23 | | static const uint8_t bilinear_filters[8][2] = { |
24 | | { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, |
25 | | { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, |
26 | | }; |
27 | | |
28 | | uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride, |
29 | 52.3M | const uint8_t *ref_ptr, int ref_stride) { |
30 | 52.3M | int distortion = 0; |
31 | 52.3M | int r, c; |
32 | | |
33 | 261M | for (r = 0; r < 4; ++r) { |
34 | 1.04G | for (c = 0; c < 4; ++c) { |
35 | 837M | int diff = src_ptr[c] - ref_ptr[c]; |
36 | 837M | distortion += diff * diff; |
37 | 837M | } |
38 | | |
39 | 209M | src_ptr += src_stride; |
40 | 209M | ref_ptr += ref_stride; |
41 | 209M | } |
42 | | |
43 | 52.3M | return distortion; |
44 | 52.3M | } |
45 | | |
46 | 0 | uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) { |
47 | 0 | unsigned int i, sum = 0; |
48 | |
|
49 | 0 | for (i = 0; i < 256; ++i) { |
50 | 0 | sum += src_ptr[i] * src_ptr[i]; |
51 | 0 | } |
52 | |
|
53 | 0 | return sum; |
54 | 0 | } |
55 | | |
56 | | static void variance(const uint8_t *src_ptr, int src_stride, |
57 | | const uint8_t *ref_ptr, int ref_stride, int w, int h, |
58 | 0 | uint32_t *sse, int *sum) { |
59 | 0 | int i, j; |
60 | |
|
61 | 0 | *sum = 0; |
62 | 0 | *sse = 0; |
63 | |
|
64 | 0 | for (i = 0; i < h; ++i) { |
65 | 0 | for (j = 0; j < w; ++j) { |
66 | 0 | const int diff = src_ptr[j] - ref_ptr[j]; |
67 | 0 | *sum += diff; |
68 | 0 | *sse += diff * diff; |
69 | 0 | } |
70 | |
|
71 | 0 | src_ptr += src_stride; |
72 | 0 | ref_ptr += ref_stride; |
73 | 0 | } |
74 | 0 | } |
75 | | |
76 | | // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal |
77 | | // or vertical direction to produce the filtered output block. Used to implement |
78 | | // the first-pass of 2-D separable filter. |
79 | | // |
80 | | // Produces int16_t output to retain precision for the next pass. Two filter |
81 | | // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is |
82 | | // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). |
83 | | // It defines the offset required to move from one input to the next. |
84 | | static void var_filter_block2d_bil_first_pass( |
85 | | const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line, |
86 | | int pixel_step, unsigned int output_height, unsigned int output_width, |
87 | 0 | const uint8_t *filter) { |
88 | 0 | unsigned int i, j; |
89 | |
|
90 | 0 | for (i = 0; i < output_height; ++i) { |
91 | 0 | for (j = 0; j < output_width; ++j) { |
92 | 0 | ref_ptr[j] = ROUND_POWER_OF_TWO( |
93 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
94 | 0 | FILTER_BITS); |
95 | |
|
96 | 0 | ++src_ptr; |
97 | 0 | } |
98 | |
|
99 | 0 | src_ptr += src_pixels_per_line - output_width; |
100 | 0 | ref_ptr += output_width; |
101 | 0 | } |
102 | 0 | } |
103 | | |
104 | | // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal |
105 | | // or vertical direction to produce the filtered output block. Used to implement |
106 | | // the second-pass of 2-D separable filter. |
107 | | // |
108 | | // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two |
109 | | // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the |
110 | | // filter is applied horizontally (pixel_step = 1) or vertically |
111 | | // (pixel_step = stride). It defines the offset required to move from one input |
112 | | // to the next. Output is 8-bit. |
113 | | static void var_filter_block2d_bil_second_pass( |
114 | | const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line, |
115 | | unsigned int pixel_step, unsigned int output_height, |
116 | 0 | unsigned int output_width, const uint8_t *filter) { |
117 | 0 | unsigned int i, j; |
118 | |
|
119 | 0 | for (i = 0; i < output_height; ++i) { |
120 | 0 | for (j = 0; j < output_width; ++j) { |
121 | 0 | ref_ptr[j] = ROUND_POWER_OF_TWO( |
122 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
123 | 0 | FILTER_BITS); |
124 | 0 | ++src_ptr; |
125 | 0 | } |
126 | |
|
127 | 0 | src_ptr += src_pixels_per_line - output_width; |
128 | 0 | ref_ptr += output_width; |
129 | 0 | } |
130 | 0 | } |
131 | | |
132 | | #define VAR(W, H) \ |
133 | | uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ |
134 | | const uint8_t *ref_ptr, int ref_stride, \ |
135 | 0 | uint32_t *sse) { \ |
136 | 0 | int sum; \ |
137 | 0 | variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ |
138 | 0 | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ |
139 | 0 | } Unexecuted instantiation: vpx_variance64x64_c Unexecuted instantiation: vpx_variance64x32_c Unexecuted instantiation: vpx_variance32x64_c Unexecuted instantiation: vpx_variance32x32_c Unexecuted instantiation: vpx_variance32x16_c Unexecuted instantiation: vpx_variance16x32_c Unexecuted instantiation: vpx_variance16x16_c Unexecuted instantiation: vpx_variance16x8_c Unexecuted instantiation: vpx_variance8x16_c Unexecuted instantiation: vpx_variance8x8_c Unexecuted instantiation: vpx_variance8x4_c Unexecuted instantiation: vpx_variance4x8_c Unexecuted instantiation: vpx_variance4x4_c |
140 | | |
141 | | #define SUBPIX_VAR(W, H) \ |
142 | | uint32_t vpx_sub_pixel_variance##W##x##H##_c( \ |
143 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
144 | 0 | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ |
145 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
146 | 0 | uint8_t temp2[H * W]; \ |
147 | 0 | \ |
148 | 0 | var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ |
149 | 0 | W, bilinear_filters[x_offset]); \ |
150 | 0 | var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
151 | 0 | bilinear_filters[y_offset]); \ |
152 | 0 | \ |
153 | 0 | return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \ |
154 | 0 | } Unexecuted instantiation: vpx_sub_pixel_variance64x64_c Unexecuted instantiation: vpx_sub_pixel_variance64x32_c Unexecuted instantiation: vpx_sub_pixel_variance32x64_c Unexecuted instantiation: vpx_sub_pixel_variance32x32_c Unexecuted instantiation: vpx_sub_pixel_variance32x16_c Unexecuted instantiation: vpx_sub_pixel_variance16x32_c Unexecuted instantiation: vpx_sub_pixel_variance16x16_c Unexecuted instantiation: vpx_sub_pixel_variance16x8_c Unexecuted instantiation: vpx_sub_pixel_variance8x16_c Unexecuted instantiation: vpx_sub_pixel_variance8x8_c Unexecuted instantiation: vpx_sub_pixel_variance8x4_c Unexecuted instantiation: vpx_sub_pixel_variance4x8_c Unexecuted instantiation: vpx_sub_pixel_variance4x4_c |
155 | | |
156 | | #define SUBPIX_AVG_VAR(W, H) \ |
157 | | uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \ |
158 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
159 | | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ |
160 | 0 | const uint8_t *second_pred) { \ |
161 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
162 | 0 | uint8_t temp2[H * W]; \ |
163 | 0 | DECLARE_ALIGNED(32, uint8_t, temp3[H * W]); \ |
164 | 0 | \ |
165 | 0 | var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ |
166 | 0 | W, bilinear_filters[x_offset]); \ |
167 | 0 | var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
168 | 0 | bilinear_filters[y_offset]); \ |
169 | 0 | \ |
170 | 0 | vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ |
171 | 0 | \ |
172 | 0 | return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \ |
173 | 0 | } Unexecuted instantiation: vpx_sub_pixel_avg_variance64x64_c Unexecuted instantiation: vpx_sub_pixel_avg_variance64x32_c Unexecuted instantiation: vpx_sub_pixel_avg_variance32x64_c Unexecuted instantiation: vpx_sub_pixel_avg_variance32x32_c Unexecuted instantiation: vpx_sub_pixel_avg_variance32x16_c Unexecuted instantiation: vpx_sub_pixel_avg_variance16x32_c Unexecuted instantiation: vpx_sub_pixel_avg_variance16x16_c Unexecuted instantiation: vpx_sub_pixel_avg_variance16x8_c Unexecuted instantiation: vpx_sub_pixel_avg_variance8x16_c Unexecuted instantiation: vpx_sub_pixel_avg_variance8x8_c Unexecuted instantiation: vpx_sub_pixel_avg_variance8x4_c Unexecuted instantiation: vpx_sub_pixel_avg_variance4x8_c Unexecuted instantiation: vpx_sub_pixel_avg_variance4x4_c |
174 | | |
175 | | /* Identical to the variance call except it takes an additional parameter, sum, |
176 | | * and returns that value using pass-by-reference instead of returning |
177 | | * sse - sum^2 / w*h |
178 | | */ |
179 | | #define GET_VAR(W, H) \ |
180 | | void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \ |
181 | | const uint8_t *ref_ptr, int ref_stride, \ |
182 | 0 | uint32_t *sse, int *sum) { \ |
183 | 0 | variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \ |
184 | 0 | } Unexecuted instantiation: vpx_get16x16var_c Unexecuted instantiation: vpx_get8x8var_c |
185 | | |
186 | | /* Identical to the variance call except it does not calculate the |
187 | | * sse - sum^2 / w*h and returns sse in addition to modifying the passed in |
188 | | * variable. |
189 | | */ |
190 | | #define MSE(W, H) \ |
191 | | uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ |
192 | | const uint8_t *ref_ptr, int ref_stride, \ |
193 | 0 | uint32_t *sse) { \ |
194 | 0 | int sum; \ |
195 | 0 | variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ |
196 | 0 | return *sse; \ |
197 | 0 | } Unexecuted instantiation: vpx_mse16x16_c Unexecuted instantiation: vpx_mse16x8_c Unexecuted instantiation: vpx_mse8x16_c Unexecuted instantiation: vpx_mse8x8_c |
198 | | |
199 | | /* All three forms of the variance are available in the same sizes. */ |
200 | | #define VARIANCES(W, H) \ |
201 | | VAR(W, H) \ |
202 | | SUBPIX_VAR(W, H) \ |
203 | | SUBPIX_AVG_VAR(W, H) |
204 | | |
205 | | VARIANCES(64, 64) |
206 | | VARIANCES(64, 32) |
207 | | VARIANCES(32, 64) |
208 | | VARIANCES(32, 32) |
209 | | VARIANCES(32, 16) |
210 | | VARIANCES(16, 32) |
211 | | VARIANCES(16, 16) |
212 | | VARIANCES(16, 8) |
213 | | VARIANCES(8, 16) |
214 | | VARIANCES(8, 8) |
215 | | VARIANCES(8, 4) |
216 | | VARIANCES(4, 8) |
217 | | VARIANCES(4, 4) |
218 | | |
219 | | GET_VAR(16, 16) |
220 | | GET_VAR(8, 8) |
221 | | |
222 | | MSE(16, 16) |
223 | | MSE(16, 8) |
224 | | MSE(8, 16) |
225 | | MSE(8, 8) |
226 | | |
227 | | void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, |
228 | 0 | int height, const uint8_t *ref, int ref_stride) { |
229 | 0 | int i, j; |
230 | |
|
231 | 0 | for (i = 0; i < height; ++i) { |
232 | 0 | for (j = 0; j < width; ++j) { |
233 | 0 | const int tmp = pred[j] + ref[j]; |
234 | 0 | comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); |
235 | 0 | } |
236 | 0 | comp_pred += width; |
237 | 0 | pred += width; |
238 | 0 | ref += ref_stride; |
239 | 0 | } |
240 | 0 | } |
241 | | |
242 | | #if CONFIG_VP9_HIGHBITDEPTH |
243 | | // Note this function uses unsigned integer math in calculating the sum squared |
244 | | // error to avoid reports of signed integer overflow should a (fuzzing) test |
245 | | // use input values that are outside of the valid range for 12-bit content. The |
246 | | // additional annotation is necessary as an overflow will still be reported |
247 | | // with the integer/unsigned-integer-overflow (not undefined) sanitizer. |
248 | | // This function assumes `abs(value) <= UINT16_MAX`, as the only overflow |
249 | | // that's expected is from casting negative signed integer values to unsigned |
250 | | // and squaring the result. |
251 | 0 | static VPX_NO_UNSIGNED_OVERFLOW_CHECK uint32_t square_value(int value) { |
252 | 0 | assert(abs(value) <= UINT16_MAX); |
253 | 0 | const uint32_t unsigned_value = (uint32_t)value; |
254 | 0 | return unsigned_value * unsigned_value; |
255 | 0 | } |
256 | | |
257 | | static void highbd_variance64(const uint8_t *src8_ptr, int src_stride, |
258 | | const uint8_t *ref8_ptr, int ref_stride, int w, |
259 | 0 | int h, uint64_t *sse, int64_t *sum) { |
260 | 0 | int i, j; |
261 | |
|
262 | 0 | uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8_ptr); |
263 | 0 | uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr); |
264 | 0 | *sum = 0; |
265 | 0 | *sse = 0; |
266 | |
|
267 | 0 | for (i = 0; i < h; ++i) { |
268 | 0 | for (j = 0; j < w; ++j) { |
269 | 0 | const int diff = src_ptr[j] - ref_ptr[j]; |
270 | 0 | *sum += diff; |
271 | 0 | *sse += square_value(diff); |
272 | 0 | } |
273 | 0 | src_ptr += src_stride; |
274 | 0 | ref_ptr += ref_stride; |
275 | 0 | } |
276 | 0 | } |
277 | | |
278 | | static void highbd_8_variance(const uint8_t *src8_ptr, int src_stride, |
279 | | const uint8_t *ref8_ptr, int ref_stride, int w, |
280 | 0 | int h, uint32_t *sse, int *sum) { |
281 | 0 | uint64_t sse_long = 0; |
282 | 0 | int64_t sum_long = 0; |
283 | 0 | highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, |
284 | 0 | &sum_long); |
285 | 0 | *sse = (uint32_t)sse_long; |
286 | 0 | *sum = (int)sum_long; |
287 | 0 | } |
288 | | |
289 | | static void highbd_10_variance(const uint8_t *src8_ptr, int src_stride, |
290 | | const uint8_t *ref8_ptr, int ref_stride, int w, |
291 | 0 | int h, uint32_t *sse, int *sum) { |
292 | 0 | uint64_t sse_long = 0; |
293 | 0 | int64_t sum_long = 0; |
294 | 0 | highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, |
295 | 0 | &sum_long); |
296 | 0 | *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4); |
297 | 0 | *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); |
298 | 0 | } |
299 | | |
300 | | static void highbd_12_variance(const uint8_t *src8_ptr, int src_stride, |
301 | | const uint8_t *ref8_ptr, int ref_stride, int w, |
302 | 0 | int h, uint32_t *sse, int *sum) { |
303 | 0 | uint64_t sse_long = 0; |
304 | 0 | int64_t sum_long = 0; |
305 | 0 | highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, |
306 | 0 | &sum_long); |
307 | 0 | *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8); |
308 | 0 | *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); |
309 | 0 | } |
310 | | |
311 | | #define HIGHBD_VAR(W, H) \ |
312 | | uint32_t vpx_highbd_8_variance##W##x##H##_c( \ |
313 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
314 | 0 | int ref_stride, uint32_t *sse) { \ |
315 | 0 | int sum; \ |
316 | 0 | highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
317 | 0 | &sum); \ |
318 | 0 | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ |
319 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_variance64x64_c Unexecuted instantiation: vpx_highbd_8_variance64x32_c Unexecuted instantiation: vpx_highbd_8_variance32x64_c Unexecuted instantiation: vpx_highbd_8_variance32x32_c Unexecuted instantiation: vpx_highbd_8_variance32x16_c Unexecuted instantiation: vpx_highbd_8_variance16x32_c Unexecuted instantiation: vpx_highbd_8_variance16x16_c Unexecuted instantiation: vpx_highbd_8_variance16x8_c Unexecuted instantiation: vpx_highbd_8_variance8x16_c Unexecuted instantiation: vpx_highbd_8_variance8x8_c Unexecuted instantiation: vpx_highbd_8_variance8x4_c Unexecuted instantiation: vpx_highbd_8_variance4x8_c Unexecuted instantiation: vpx_highbd_8_variance4x4_c |
320 | | \ |
321 | | uint32_t vpx_highbd_10_variance##W##x##H##_c( \ |
322 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
323 | 0 | int ref_stride, uint32_t *sse) { \ |
324 | 0 | int sum; \ |
325 | 0 | int64_t var; \ |
326 | 0 | highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
327 | 0 | &sum); \ |
328 | 0 | var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ |
329 | 0 | return (var >= 0) ? (uint32_t)var : 0; \ |
330 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_variance64x64_c Unexecuted instantiation: vpx_highbd_10_variance64x32_c Unexecuted instantiation: vpx_highbd_10_variance32x64_c Unexecuted instantiation: vpx_highbd_10_variance32x32_c Unexecuted instantiation: vpx_highbd_10_variance32x16_c Unexecuted instantiation: vpx_highbd_10_variance16x32_c Unexecuted instantiation: vpx_highbd_10_variance16x16_c Unexecuted instantiation: vpx_highbd_10_variance16x8_c Unexecuted instantiation: vpx_highbd_10_variance8x16_c Unexecuted instantiation: vpx_highbd_10_variance8x8_c Unexecuted instantiation: vpx_highbd_10_variance8x4_c Unexecuted instantiation: vpx_highbd_10_variance4x8_c Unexecuted instantiation: vpx_highbd_10_variance4x4_c |
331 | | \ |
332 | | uint32_t vpx_highbd_12_variance##W##x##H##_c( \ |
333 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
334 | 0 | int ref_stride, uint32_t *sse) { \ |
335 | 0 | int sum; \ |
336 | 0 | int64_t var; \ |
337 | 0 | highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
338 | 0 | &sum); \ |
339 | 0 | var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ |
340 | 0 | return (var >= 0) ? (uint32_t)var : 0; \ |
341 | 0 | } Unexecuted instantiation: vpx_highbd_12_variance64x64_c Unexecuted instantiation: vpx_highbd_12_variance64x32_c Unexecuted instantiation: vpx_highbd_12_variance32x64_c Unexecuted instantiation: vpx_highbd_12_variance32x32_c Unexecuted instantiation: vpx_highbd_12_variance32x16_c Unexecuted instantiation: vpx_highbd_12_variance16x32_c Unexecuted instantiation: vpx_highbd_12_variance16x16_c Unexecuted instantiation: vpx_highbd_12_variance16x8_c Unexecuted instantiation: vpx_highbd_12_variance8x16_c Unexecuted instantiation: vpx_highbd_12_variance8x8_c Unexecuted instantiation: vpx_highbd_12_variance8x4_c Unexecuted instantiation: vpx_highbd_12_variance4x8_c Unexecuted instantiation: vpx_highbd_12_variance4x4_c |
342 | | |
343 | | #define HIGHBD_GET_VAR(S) \ |
344 | | void vpx_highbd_8_get##S##x##S##var_c( \ |
345 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
346 | 0 | int ref_stride, uint32_t *sse, int *sum) { \ |
347 | 0 | highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ |
348 | 0 | sum); \ |
349 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_get8x8var_c Unexecuted instantiation: vpx_highbd_8_get16x16var_c |
350 | | \ |
351 | | void vpx_highbd_10_get##S##x##S##var_c( \ |
352 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
353 | 0 | int ref_stride, uint32_t *sse, int *sum) { \ |
354 | 0 | highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ |
355 | 0 | sum); \ |
356 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_get8x8var_c Unexecuted instantiation: vpx_highbd_10_get16x16var_c |
357 | | \ |
358 | | void vpx_highbd_12_get##S##x##S##var_c( \ |
359 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
360 | 0 | int ref_stride, uint32_t *sse, int *sum) { \ |
361 | 0 | highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ |
362 | 0 | sum); \ |
363 | 0 | } Unexecuted instantiation: vpx_highbd_12_get8x8var_c Unexecuted instantiation: vpx_highbd_12_get16x16var_c |
364 | | |
365 | | #define HIGHBD_MSE(W, H) \ |
366 | | uint32_t vpx_highbd_8_mse##W##x##H##_c( \ |
367 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
368 | 0 | int ref_stride, uint32_t *sse) { \ |
369 | 0 | int sum; \ |
370 | 0 | highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
371 | 0 | &sum); \ |
372 | 0 | return *sse; \ |
373 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_mse16x16_c Unexecuted instantiation: vpx_highbd_8_mse16x8_c Unexecuted instantiation: vpx_highbd_8_mse8x16_c Unexecuted instantiation: vpx_highbd_8_mse8x8_c |
374 | | \ |
375 | | uint32_t vpx_highbd_10_mse##W##x##H##_c( \ |
376 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
377 | 0 | int ref_stride, uint32_t *sse) { \ |
378 | 0 | int sum; \ |
379 | 0 | highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
380 | 0 | &sum); \ |
381 | 0 | return *sse; \ |
382 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_mse16x16_c Unexecuted instantiation: vpx_highbd_10_mse16x8_c Unexecuted instantiation: vpx_highbd_10_mse8x16_c Unexecuted instantiation: vpx_highbd_10_mse8x8_c |
383 | | \ |
384 | | uint32_t vpx_highbd_12_mse##W##x##H##_c( \ |
385 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
386 | 0 | int ref_stride, uint32_t *sse) { \ |
387 | 0 | int sum; \ |
388 | 0 | highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
389 | 0 | &sum); \ |
390 | 0 | return *sse; \ |
391 | 0 | } Unexecuted instantiation: vpx_highbd_12_mse16x16_c Unexecuted instantiation: vpx_highbd_12_mse16x8_c Unexecuted instantiation: vpx_highbd_12_mse8x16_c Unexecuted instantiation: vpx_highbd_12_mse8x8_c |
392 | | |
393 | | static void highbd_var_filter_block2d_bil_first_pass( |
394 | | const uint8_t *src_ptr8, uint16_t *output_ptr, |
395 | | unsigned int src_pixels_per_line, int pixel_step, |
396 | | unsigned int output_height, unsigned int output_width, |
397 | 0 | const uint8_t *filter) { |
398 | 0 | unsigned int i, j; |
399 | 0 | uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); |
400 | 0 | for (i = 0; i < output_height; ++i) { |
401 | 0 | for (j = 0; j < output_width; ++j) { |
402 | 0 | output_ptr[j] = ROUND_POWER_OF_TWO( |
403 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
404 | 0 | FILTER_BITS); |
405 | |
|
406 | 0 | ++src_ptr; |
407 | 0 | } |
408 | | |
409 | | // Next row... |
410 | 0 | src_ptr += src_pixels_per_line - output_width; |
411 | 0 | output_ptr += output_width; |
412 | 0 | } |
413 | 0 | } |
414 | | |
415 | | static void highbd_var_filter_block2d_bil_second_pass( |
416 | | const uint16_t *src_ptr, uint16_t *output_ptr, |
417 | | unsigned int src_pixels_per_line, unsigned int pixel_step, |
418 | | unsigned int output_height, unsigned int output_width, |
419 | 0 | const uint8_t *filter) { |
420 | 0 | unsigned int i, j; |
421 | |
|
422 | 0 | for (i = 0; i < output_height; ++i) { |
423 | 0 | for (j = 0; j < output_width; ++j) { |
424 | 0 | output_ptr[j] = ROUND_POWER_OF_TWO( |
425 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
426 | 0 | FILTER_BITS); |
427 | 0 | ++src_ptr; |
428 | 0 | } |
429 | |
|
430 | 0 | src_ptr += src_pixels_per_line - output_width; |
431 | 0 | output_ptr += output_width; |
432 | 0 | } |
433 | 0 | } |
434 | | |
435 | | #define HIGHBD_SUBPIX_VAR(W, H) \ |
436 | | uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \ |
437 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
438 | 0 | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ |
439 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
440 | 0 | uint16_t temp2[H * W]; \ |
441 | 0 | \ |
442 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
443 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
444 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
445 | 0 | bilinear_filters[y_offset]); \ |
446 | 0 | \ |
447 | 0 | return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
448 | 0 | ref_ptr, ref_stride, sse); \ |
449 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance64x64_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance64x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance32x64_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance32x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance32x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance16x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance16x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance16x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance8x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance8x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance8x4_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance4x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance4x4_c |
450 | | \ |
451 | | uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ |
452 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
453 | 0 | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ |
454 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
455 | 0 | uint16_t temp2[H * W]; \ |
456 | 0 | \ |
457 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
458 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
459 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
460 | 0 | bilinear_filters[y_offset]); \ |
461 | 0 | \ |
462 | 0 | return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
463 | 0 | ref_ptr, ref_stride, sse); \ |
464 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance64x64_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance64x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance32x64_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance32x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance32x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance16x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance16x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance16x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance8x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance8x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance8x4_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance4x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance4x4_c |
465 | | \ |
466 | | uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \ |
467 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
468 | 0 | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ |
469 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
470 | 0 | uint16_t temp2[H * W]; \ |
471 | 0 | \ |
472 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
473 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
474 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
475 | 0 | bilinear_filters[y_offset]); \ |
476 | 0 | \ |
477 | 0 | return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
478 | 0 | ref_ptr, ref_stride, sse); \ |
479 | 0 | } Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance64x64_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance64x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance32x64_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance32x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance32x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance16x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance16x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance16x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance8x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance8x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance8x4_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance4x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance4x4_c |
480 | | |
481 | | #define HIGHBD_SUBPIX_AVG_VAR(W, H) \ |
482 | | uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ |
483 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
484 | | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ |
485 | 0 | const uint8_t *second_pred) { \ |
486 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
487 | 0 | uint16_t temp2[H * W]; \ |
488 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
489 | 0 | \ |
490 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
491 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
492 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
493 | 0 | bilinear_filters[y_offset]); \ |
494 | 0 | \ |
495 | 0 | vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ |
496 | 0 | temp2, W); \ |
497 | 0 | \ |
498 | 0 | return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
499 | 0 | ref_ptr, ref_stride, sse); \ |
500 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance64x64_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance64x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance32x64_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance32x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance32x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance16x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance16x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance16x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance8x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance8x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance8x4_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance4x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance4x4_c |
501 | | \ |
502 | | uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ |
503 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
504 | | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ |
505 | 0 | const uint8_t *second_pred) { \ |
506 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
507 | 0 | uint16_t temp2[H * W]; \ |
508 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
509 | 0 | \ |
510 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
511 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
512 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
513 | 0 | bilinear_filters[y_offset]); \ |
514 | 0 | \ |
515 | 0 | vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ |
516 | 0 | temp2, W); \ |
517 | 0 | \ |
518 | 0 | return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
519 | 0 | ref_ptr, ref_stride, sse); \ |
520 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance64x64_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance64x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance32x64_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance32x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance32x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance16x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance16x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance16x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance8x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance8x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance8x4_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance4x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance4x4_c |
521 | | \ |
522 | | uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ |
523 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
524 | | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ |
525 | 0 | const uint8_t *second_pred) { \ |
526 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
527 | 0 | uint16_t temp2[H * W]; \ |
528 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
529 | 0 | \ |
530 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
531 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
532 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
533 | 0 | bilinear_filters[y_offset]); \ |
534 | 0 | \ |
535 | 0 | vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ |
536 | 0 | temp2, W); \ |
537 | 0 | \ |
538 | 0 | return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
539 | 0 | ref_ptr, ref_stride, sse); \ |
540 | 0 | } Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance64x64_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance64x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance32x64_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance32x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance32x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance16x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance16x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance16x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance8x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance8x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance8x4_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance4x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance4x4_c |
541 | | |
542 | | /* All three forms of the variance are available in the same sizes. */ |
543 | | #define HIGHBD_VARIANCES(W, H) \ |
544 | | HIGHBD_VAR(W, H) \ |
545 | | HIGHBD_SUBPIX_VAR(W, H) \ |
546 | | HIGHBD_SUBPIX_AVG_VAR(W, H) |
547 | | |
548 | | HIGHBD_VARIANCES(64, 64) |
549 | | HIGHBD_VARIANCES(64, 32) |
550 | | HIGHBD_VARIANCES(32, 64) |
551 | | HIGHBD_VARIANCES(32, 32) |
552 | | HIGHBD_VARIANCES(32, 16) |
553 | | HIGHBD_VARIANCES(16, 32) |
554 | | HIGHBD_VARIANCES(16, 16) |
555 | | HIGHBD_VARIANCES(16, 8) |
556 | | HIGHBD_VARIANCES(8, 16) |
557 | | HIGHBD_VARIANCES(8, 8) |
558 | | HIGHBD_VARIANCES(8, 4) |
559 | | HIGHBD_VARIANCES(4, 8) |
560 | | HIGHBD_VARIANCES(4, 4) |
561 | | |
562 | | HIGHBD_GET_VAR(8) |
563 | | HIGHBD_GET_VAR(16) |
564 | | |
565 | | HIGHBD_MSE(16, 16) |
566 | | HIGHBD_MSE(16, 8) |
567 | | HIGHBD_MSE(8, 16) |
568 | | HIGHBD_MSE(8, 8) |
569 | | |
570 | | void vpx_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint16_t *pred, |
571 | | int width, int height, const uint16_t *ref, |
572 | 0 | int ref_stride) { |
573 | 0 | int i, j; |
574 | 0 | for (i = 0; i < height; ++i) { |
575 | 0 | for (j = 0; j < width; ++j) { |
576 | 0 | const int tmp = pred[j] + ref[j]; |
577 | 0 | comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); |
578 | 0 | } |
579 | 0 | comp_pred += width; |
580 | 0 | pred += width; |
581 | 0 | ref += ref_stride; |
582 | 0 | } |
583 | 0 | } |
584 | | #endif // CONFIG_VP9_HIGHBITDEPTH |