/src/libvpx/vpx_dsp/variance.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "./vpx_config.h" |
12 | | #include "./vpx_dsp_rtcd.h" |
13 | | |
14 | | #include "vpx_ports/mem.h" |
15 | | #include "vpx/vpx_integer.h" |
16 | | |
17 | | #include "vpx_dsp/variance.h" |
18 | | |
19 | | static const uint8_t bilinear_filters[8][2] = { |
20 | | { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, |
21 | | { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, |
22 | | }; |
23 | | |
24 | | uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride, |
25 | 47.2M | const uint8_t *ref_ptr, int ref_stride) { |
26 | 47.2M | int distortion = 0; |
27 | 47.2M | int r, c; |
28 | | |
29 | 236M | for (r = 0; r < 4; ++r) { |
30 | 945M | for (c = 0; c < 4; ++c) { |
31 | 756M | int diff = src_ptr[c] - ref_ptr[c]; |
32 | 756M | distortion += diff * diff; |
33 | 756M | } |
34 | | |
35 | 189M | src_ptr += src_stride; |
36 | 189M | ref_ptr += ref_stride; |
37 | 189M | } |
38 | | |
39 | 47.2M | return distortion; |
40 | 47.2M | } |
41 | | |
42 | 0 | uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) { |
43 | 0 | unsigned int i, sum = 0; |
44 | |
|
45 | 0 | for (i = 0; i < 256; ++i) { |
46 | 0 | sum += src_ptr[i] * src_ptr[i]; |
47 | 0 | } |
48 | |
|
49 | 0 | return sum; |
50 | 0 | } |
51 | | |
52 | | static void variance(const uint8_t *src_ptr, int src_stride, |
53 | | const uint8_t *ref_ptr, int ref_stride, int w, int h, |
54 | 0 | uint32_t *sse, int *sum) { |
55 | 0 | int i, j; |
56 | |
|
57 | 0 | *sum = 0; |
58 | 0 | *sse = 0; |
59 | |
|
60 | 0 | for (i = 0; i < h; ++i) { |
61 | 0 | for (j = 0; j < w; ++j) { |
62 | 0 | const int diff = src_ptr[j] - ref_ptr[j]; |
63 | 0 | *sum += diff; |
64 | 0 | *sse += diff * diff; |
65 | 0 | } |
66 | |
|
67 | 0 | src_ptr += src_stride; |
68 | 0 | ref_ptr += ref_stride; |
69 | 0 | } |
70 | 0 | } |
71 | | |
72 | | // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal |
73 | | // or vertical direction to produce the filtered output block. Used to implement |
74 | | // the first-pass of 2-D separable filter. |
75 | | // |
76 | | // Produces int16_t output to retain precision for the next pass. Two filter |
77 | | // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is |
78 | | // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). |
79 | | // It defines the offset required to move from one input to the next. |
80 | | static void var_filter_block2d_bil_first_pass( |
81 | | const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line, |
82 | | int pixel_step, unsigned int output_height, unsigned int output_width, |
83 | 0 | const uint8_t *filter) { |
84 | 0 | unsigned int i, j; |
85 | |
|
86 | 0 | for (i = 0; i < output_height; ++i) { |
87 | 0 | for (j = 0; j < output_width; ++j) { |
88 | 0 | ref_ptr[j] = ROUND_POWER_OF_TWO( |
89 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
90 | 0 | FILTER_BITS); |
91 | |
|
92 | 0 | ++src_ptr; |
93 | 0 | } |
94 | |
|
95 | 0 | src_ptr += src_pixels_per_line - output_width; |
96 | 0 | ref_ptr += output_width; |
97 | 0 | } |
98 | 0 | } |
99 | | |
100 | | // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal |
101 | | // or vertical direction to produce the filtered output block. Used to implement |
102 | | // the second-pass of 2-D separable filter. |
103 | | // |
104 | | // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two |
105 | | // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the |
106 | | // filter is applied horizontally (pixel_step = 1) or vertically |
107 | | // (pixel_step = stride). It defines the offset required to move from one input |
108 | | // to the next. Output is 8-bit. |
109 | | static void var_filter_block2d_bil_second_pass( |
110 | | const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line, |
111 | | unsigned int pixel_step, unsigned int output_height, |
112 | 0 | unsigned int output_width, const uint8_t *filter) { |
113 | 0 | unsigned int i, j; |
114 | |
|
115 | 0 | for (i = 0; i < output_height; ++i) { |
116 | 0 | for (j = 0; j < output_width; ++j) { |
117 | 0 | ref_ptr[j] = ROUND_POWER_OF_TWO( |
118 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
119 | 0 | FILTER_BITS); |
120 | 0 | ++src_ptr; |
121 | 0 | } |
122 | |
|
123 | 0 | src_ptr += src_pixels_per_line - output_width; |
124 | 0 | ref_ptr += output_width; |
125 | 0 | } |
126 | 0 | } |
127 | | |
128 | | #define VAR(W, H) \ |
129 | | uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ |
130 | | const uint8_t *ref_ptr, int ref_stride, \ |
131 | 0 | uint32_t *sse) { \ |
132 | 0 | int sum; \ |
133 | 0 | variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ |
134 | 0 | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ |
135 | 0 | } Unexecuted instantiation: vpx_variance64x64_c Unexecuted instantiation: vpx_variance64x32_c Unexecuted instantiation: vpx_variance32x64_c Unexecuted instantiation: vpx_variance32x32_c Unexecuted instantiation: vpx_variance32x16_c Unexecuted instantiation: vpx_variance16x32_c Unexecuted instantiation: vpx_variance16x16_c Unexecuted instantiation: vpx_variance16x8_c Unexecuted instantiation: vpx_variance8x16_c Unexecuted instantiation: vpx_variance8x8_c Unexecuted instantiation: vpx_variance8x4_c Unexecuted instantiation: vpx_variance4x8_c Unexecuted instantiation: vpx_variance4x4_c |
136 | | |
137 | | #define SUBPIX_VAR(W, H) \ |
138 | | uint32_t vpx_sub_pixel_variance##W##x##H##_c( \ |
139 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
140 | 0 | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ |
141 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
142 | 0 | uint8_t temp2[H * W]; \ |
143 | 0 | \ |
144 | 0 | var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ |
145 | 0 | W, bilinear_filters[x_offset]); \ |
146 | 0 | var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
147 | 0 | bilinear_filters[y_offset]); \ |
148 | 0 | \ |
149 | 0 | return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \ |
150 | 0 | } Unexecuted instantiation: vpx_sub_pixel_variance64x64_c Unexecuted instantiation: vpx_sub_pixel_variance64x32_c Unexecuted instantiation: vpx_sub_pixel_variance32x64_c Unexecuted instantiation: vpx_sub_pixel_variance32x32_c Unexecuted instantiation: vpx_sub_pixel_variance32x16_c Unexecuted instantiation: vpx_sub_pixel_variance16x32_c Unexecuted instantiation: vpx_sub_pixel_variance16x16_c Unexecuted instantiation: vpx_sub_pixel_variance16x8_c Unexecuted instantiation: vpx_sub_pixel_variance8x16_c Unexecuted instantiation: vpx_sub_pixel_variance8x8_c Unexecuted instantiation: vpx_sub_pixel_variance8x4_c Unexecuted instantiation: vpx_sub_pixel_variance4x8_c Unexecuted instantiation: vpx_sub_pixel_variance4x4_c |
151 | | |
152 | | #define SUBPIX_AVG_VAR(W, H) \ |
153 | | uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \ |
154 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
155 | | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ |
156 | 0 | const uint8_t *second_pred) { \ |
157 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
158 | 0 | uint8_t temp2[H * W]; \ |
159 | 0 | DECLARE_ALIGNED(32, uint8_t, temp3[H * W]); \ |
160 | 0 | \ |
161 | 0 | var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ |
162 | 0 | W, bilinear_filters[x_offset]); \ |
163 | 0 | var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
164 | 0 | bilinear_filters[y_offset]); \ |
165 | 0 | \ |
166 | 0 | vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ |
167 | 0 | \ |
168 | 0 | return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \ |
169 | 0 | } Unexecuted instantiation: vpx_sub_pixel_avg_variance64x64_c Unexecuted instantiation: vpx_sub_pixel_avg_variance64x32_c Unexecuted instantiation: vpx_sub_pixel_avg_variance32x64_c Unexecuted instantiation: vpx_sub_pixel_avg_variance32x32_c Unexecuted instantiation: vpx_sub_pixel_avg_variance32x16_c Unexecuted instantiation: vpx_sub_pixel_avg_variance16x32_c Unexecuted instantiation: vpx_sub_pixel_avg_variance16x16_c Unexecuted instantiation: vpx_sub_pixel_avg_variance16x8_c Unexecuted instantiation: vpx_sub_pixel_avg_variance8x16_c Unexecuted instantiation: vpx_sub_pixel_avg_variance8x8_c Unexecuted instantiation: vpx_sub_pixel_avg_variance8x4_c Unexecuted instantiation: vpx_sub_pixel_avg_variance4x8_c Unexecuted instantiation: vpx_sub_pixel_avg_variance4x4_c |
170 | | |
171 | | /* Identical to the variance call except it takes an additional parameter, sum, |
172 | | * and returns that value using pass-by-reference instead of returning |
173 | | * sse - sum^2 / w*h |
174 | | */ |
175 | | #define GET_VAR(W, H) \ |
176 | | void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \ |
177 | | const uint8_t *ref_ptr, int ref_stride, \ |
178 | 0 | uint32_t *sse, int *sum) { \ |
179 | 0 | variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \ |
180 | 0 | } Unexecuted instantiation: vpx_get16x16var_c Unexecuted instantiation: vpx_get8x8var_c |
181 | | |
182 | | /* Identical to the variance call except it does not calculate the |
183 | | * sse - sum^2 / w*h and returns sse in addition to modifying the passed in |
184 | | * variable. |
185 | | */ |
186 | | #define MSE(W, H) \ |
187 | | uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ |
188 | | const uint8_t *ref_ptr, int ref_stride, \ |
189 | 0 | uint32_t *sse) { \ |
190 | 0 | int sum; \ |
191 | 0 | variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ |
192 | 0 | return *sse; \ |
193 | 0 | } Unexecuted instantiation: vpx_mse16x16_c Unexecuted instantiation: vpx_mse16x8_c Unexecuted instantiation: vpx_mse8x16_c Unexecuted instantiation: vpx_mse8x8_c |
194 | | |
195 | | /* All three forms of the variance are available in the same sizes. */ |
196 | | #define VARIANCES(W, H) \ |
197 | | VAR(W, H) \ |
198 | | SUBPIX_VAR(W, H) \ |
199 | | SUBPIX_AVG_VAR(W, H) |
200 | | |
201 | | VARIANCES(64, 64) |
202 | | VARIANCES(64, 32) |
203 | | VARIANCES(32, 64) |
204 | | VARIANCES(32, 32) |
205 | | VARIANCES(32, 16) |
206 | | VARIANCES(16, 32) |
207 | | VARIANCES(16, 16) |
208 | | VARIANCES(16, 8) |
209 | | VARIANCES(8, 16) |
210 | | VARIANCES(8, 8) |
211 | | VARIANCES(8, 4) |
212 | | VARIANCES(4, 8) |
213 | | VARIANCES(4, 4) |
214 | | |
215 | | GET_VAR(16, 16) |
216 | | GET_VAR(8, 8) |
217 | | |
218 | | MSE(16, 16) |
219 | | MSE(16, 8) |
220 | | MSE(8, 16) |
221 | | MSE(8, 8) |
222 | | |
223 | | void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, |
224 | 0 | int height, const uint8_t *ref, int ref_stride) { |
225 | 0 | int i, j; |
226 | |
|
227 | 0 | for (i = 0; i < height; ++i) { |
228 | 0 | for (j = 0; j < width; ++j) { |
229 | 0 | const int tmp = pred[j] + ref[j]; |
230 | 0 | comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); |
231 | 0 | } |
232 | 0 | comp_pred += width; |
233 | 0 | pred += width; |
234 | 0 | ref += ref_stride; |
235 | 0 | } |
236 | 0 | } |
237 | | |
238 | | #if CONFIG_VP9_HIGHBITDEPTH |
239 | | static void highbd_variance64(const uint8_t *src8_ptr, int src_stride, |
240 | | const uint8_t *ref8_ptr, int ref_stride, int w, |
241 | 0 | int h, uint64_t *sse, int64_t *sum) { |
242 | 0 | int i, j; |
243 | |
|
244 | 0 | uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8_ptr); |
245 | 0 | uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr); |
246 | 0 | *sum = 0; |
247 | 0 | *sse = 0; |
248 | |
|
249 | 0 | for (i = 0; i < h; ++i) { |
250 | 0 | for (j = 0; j < w; ++j) { |
251 | 0 | const int diff = src_ptr[j] - ref_ptr[j]; |
252 | 0 | *sum += diff; |
253 | 0 | *sse += diff * diff; |
254 | 0 | } |
255 | 0 | src_ptr += src_stride; |
256 | 0 | ref_ptr += ref_stride; |
257 | 0 | } |
258 | 0 | } |
259 | | |
260 | | static void highbd_8_variance(const uint8_t *src8_ptr, int src_stride, |
261 | | const uint8_t *ref8_ptr, int ref_stride, int w, |
262 | 0 | int h, uint32_t *sse, int *sum) { |
263 | 0 | uint64_t sse_long = 0; |
264 | 0 | int64_t sum_long = 0; |
265 | 0 | highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, |
266 | 0 | &sum_long); |
267 | 0 | *sse = (uint32_t)sse_long; |
268 | 0 | *sum = (int)sum_long; |
269 | 0 | } |
270 | | |
271 | | static void highbd_10_variance(const uint8_t *src8_ptr, int src_stride, |
272 | | const uint8_t *ref8_ptr, int ref_stride, int w, |
273 | 0 | int h, uint32_t *sse, int *sum) { |
274 | 0 | uint64_t sse_long = 0; |
275 | 0 | int64_t sum_long = 0; |
276 | 0 | highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, |
277 | 0 | &sum_long); |
278 | 0 | *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4); |
279 | 0 | *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); |
280 | 0 | } |
281 | | |
282 | | static void highbd_12_variance(const uint8_t *src8_ptr, int src_stride, |
283 | | const uint8_t *ref8_ptr, int ref_stride, int w, |
284 | 0 | int h, uint32_t *sse, int *sum) { |
285 | 0 | uint64_t sse_long = 0; |
286 | 0 | int64_t sum_long = 0; |
287 | 0 | highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, |
288 | 0 | &sum_long); |
289 | 0 | *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8); |
290 | 0 | *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); |
291 | 0 | } |
292 | | |
293 | | #define HIGHBD_VAR(W, H) \ |
294 | | uint32_t vpx_highbd_8_variance##W##x##H##_c( \ |
295 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
296 | 0 | int ref_stride, uint32_t *sse) { \ |
297 | 0 | int sum; \ |
298 | 0 | highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
299 | 0 | &sum); \ |
300 | 0 | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ |
301 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_variance64x64_c Unexecuted instantiation: vpx_highbd_8_variance64x32_c Unexecuted instantiation: vpx_highbd_8_variance32x64_c Unexecuted instantiation: vpx_highbd_8_variance32x32_c Unexecuted instantiation: vpx_highbd_8_variance32x16_c Unexecuted instantiation: vpx_highbd_8_variance16x32_c Unexecuted instantiation: vpx_highbd_8_variance16x16_c Unexecuted instantiation: vpx_highbd_8_variance16x8_c Unexecuted instantiation: vpx_highbd_8_variance8x16_c Unexecuted instantiation: vpx_highbd_8_variance8x8_c Unexecuted instantiation: vpx_highbd_8_variance8x4_c Unexecuted instantiation: vpx_highbd_8_variance4x8_c Unexecuted instantiation: vpx_highbd_8_variance4x4_c |
302 | | \ |
303 | | uint32_t vpx_highbd_10_variance##W##x##H##_c( \ |
304 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
305 | 0 | int ref_stride, uint32_t *sse) { \ |
306 | 0 | int sum; \ |
307 | 0 | int64_t var; \ |
308 | 0 | highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
309 | 0 | &sum); \ |
310 | 0 | var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ |
311 | 0 | return (var >= 0) ? (uint32_t)var : 0; \ |
312 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_variance64x64_c Unexecuted instantiation: vpx_highbd_10_variance64x32_c Unexecuted instantiation: vpx_highbd_10_variance32x64_c Unexecuted instantiation: vpx_highbd_10_variance32x32_c Unexecuted instantiation: vpx_highbd_10_variance32x16_c Unexecuted instantiation: vpx_highbd_10_variance16x32_c Unexecuted instantiation: vpx_highbd_10_variance16x16_c Unexecuted instantiation: vpx_highbd_10_variance16x8_c Unexecuted instantiation: vpx_highbd_10_variance8x16_c Unexecuted instantiation: vpx_highbd_10_variance8x8_c Unexecuted instantiation: vpx_highbd_10_variance8x4_c Unexecuted instantiation: vpx_highbd_10_variance4x8_c Unexecuted instantiation: vpx_highbd_10_variance4x4_c |
313 | | \ |
314 | | uint32_t vpx_highbd_12_variance##W##x##H##_c( \ |
315 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
316 | 0 | int ref_stride, uint32_t *sse) { \ |
317 | 0 | int sum; \ |
318 | 0 | int64_t var; \ |
319 | 0 | highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
320 | 0 | &sum); \ |
321 | 0 | var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ |
322 | 0 | return (var >= 0) ? (uint32_t)var : 0; \ |
323 | 0 | } Unexecuted instantiation: vpx_highbd_12_variance64x64_c Unexecuted instantiation: vpx_highbd_12_variance64x32_c Unexecuted instantiation: vpx_highbd_12_variance32x64_c Unexecuted instantiation: vpx_highbd_12_variance32x32_c Unexecuted instantiation: vpx_highbd_12_variance32x16_c Unexecuted instantiation: vpx_highbd_12_variance16x32_c Unexecuted instantiation: vpx_highbd_12_variance16x16_c Unexecuted instantiation: vpx_highbd_12_variance16x8_c Unexecuted instantiation: vpx_highbd_12_variance8x16_c Unexecuted instantiation: vpx_highbd_12_variance8x8_c Unexecuted instantiation: vpx_highbd_12_variance8x4_c Unexecuted instantiation: vpx_highbd_12_variance4x8_c Unexecuted instantiation: vpx_highbd_12_variance4x4_c |
324 | | |
325 | | #define HIGHBD_GET_VAR(S) \ |
326 | | void vpx_highbd_8_get##S##x##S##var_c( \ |
327 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
328 | 0 | int ref_stride, uint32_t *sse, int *sum) { \ |
329 | 0 | highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ |
330 | 0 | sum); \ |
331 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_get8x8var_c Unexecuted instantiation: vpx_highbd_8_get16x16var_c |
332 | | \ |
333 | | void vpx_highbd_10_get##S##x##S##var_c( \ |
334 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
335 | 0 | int ref_stride, uint32_t *sse, int *sum) { \ |
336 | 0 | highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ |
337 | 0 | sum); \ |
338 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_get8x8var_c Unexecuted instantiation: vpx_highbd_10_get16x16var_c |
339 | | \ |
340 | | void vpx_highbd_12_get##S##x##S##var_c( \ |
341 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
342 | 0 | int ref_stride, uint32_t *sse, int *sum) { \ |
343 | 0 | highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ |
344 | 0 | sum); \ |
345 | 0 | } Unexecuted instantiation: vpx_highbd_12_get8x8var_c Unexecuted instantiation: vpx_highbd_12_get16x16var_c |
346 | | |
347 | | #define HIGHBD_MSE(W, H) \ |
348 | | uint32_t vpx_highbd_8_mse##W##x##H##_c( \ |
349 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
350 | 0 | int ref_stride, uint32_t *sse) { \ |
351 | 0 | int sum; \ |
352 | 0 | highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
353 | 0 | &sum); \ |
354 | 0 | return *sse; \ |
355 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_mse16x16_c Unexecuted instantiation: vpx_highbd_8_mse16x8_c Unexecuted instantiation: vpx_highbd_8_mse8x16_c Unexecuted instantiation: vpx_highbd_8_mse8x8_c |
356 | | \ |
357 | | uint32_t vpx_highbd_10_mse##W##x##H##_c( \ |
358 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
359 | 0 | int ref_stride, uint32_t *sse) { \ |
360 | 0 | int sum; \ |
361 | 0 | highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
362 | 0 | &sum); \ |
363 | 0 | return *sse; \ |
364 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_mse16x16_c Unexecuted instantiation: vpx_highbd_10_mse16x8_c Unexecuted instantiation: vpx_highbd_10_mse8x16_c Unexecuted instantiation: vpx_highbd_10_mse8x8_c |
365 | | \ |
366 | | uint32_t vpx_highbd_12_mse##W##x##H##_c( \ |
367 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
368 | 0 | int ref_stride, uint32_t *sse) { \ |
369 | 0 | int sum; \ |
370 | 0 | highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ |
371 | 0 | &sum); \ |
372 | 0 | return *sse; \ |
373 | 0 | } Unexecuted instantiation: vpx_highbd_12_mse16x16_c Unexecuted instantiation: vpx_highbd_12_mse16x8_c Unexecuted instantiation: vpx_highbd_12_mse8x16_c Unexecuted instantiation: vpx_highbd_12_mse8x8_c |
374 | | |
375 | | static void highbd_var_filter_block2d_bil_first_pass( |
376 | | const uint8_t *src_ptr8, uint16_t *output_ptr, |
377 | | unsigned int src_pixels_per_line, int pixel_step, |
378 | | unsigned int output_height, unsigned int output_width, |
379 | 0 | const uint8_t *filter) { |
380 | 0 | unsigned int i, j; |
381 | 0 | uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); |
382 | 0 | for (i = 0; i < output_height; ++i) { |
383 | 0 | for (j = 0; j < output_width; ++j) { |
384 | 0 | output_ptr[j] = ROUND_POWER_OF_TWO( |
385 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
386 | 0 | FILTER_BITS); |
387 | |
|
388 | 0 | ++src_ptr; |
389 | 0 | } |
390 | | |
391 | | // Next row... |
392 | 0 | src_ptr += src_pixels_per_line - output_width; |
393 | 0 | output_ptr += output_width; |
394 | 0 | } |
395 | 0 | } |
396 | | |
397 | | static void highbd_var_filter_block2d_bil_second_pass( |
398 | | const uint16_t *src_ptr, uint16_t *output_ptr, |
399 | | unsigned int src_pixels_per_line, unsigned int pixel_step, |
400 | | unsigned int output_height, unsigned int output_width, |
401 | 0 | const uint8_t *filter) { |
402 | 0 | unsigned int i, j; |
403 | |
|
404 | 0 | for (i = 0; i < output_height; ++i) { |
405 | 0 | for (j = 0; j < output_width; ++j) { |
406 | 0 | output_ptr[j] = ROUND_POWER_OF_TWO( |
407 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
408 | 0 | FILTER_BITS); |
409 | 0 | ++src_ptr; |
410 | 0 | } |
411 | |
|
412 | 0 | src_ptr += src_pixels_per_line - output_width; |
413 | 0 | output_ptr += output_width; |
414 | 0 | } |
415 | 0 | } |
416 | | |
417 | | #define HIGHBD_SUBPIX_VAR(W, H) \ |
418 | | uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \ |
419 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
420 | 0 | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ |
421 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
422 | 0 | uint16_t temp2[H * W]; \ |
423 | 0 | \ |
424 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
425 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
426 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
427 | 0 | bilinear_filters[y_offset]); \ |
428 | 0 | \ |
429 | 0 | return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
430 | 0 | ref_ptr, ref_stride, sse); \ |
431 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance64x64_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance64x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance32x64_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance32x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance32x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance16x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance16x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance16x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance8x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance8x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance8x4_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance4x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_variance4x4_c |
432 | | \ |
433 | | uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ |
434 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
435 | 0 | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ |
436 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
437 | 0 | uint16_t temp2[H * W]; \ |
438 | 0 | \ |
439 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
440 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
441 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
442 | 0 | bilinear_filters[y_offset]); \ |
443 | 0 | \ |
444 | 0 | return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
445 | 0 | ref_ptr, ref_stride, sse); \ |
446 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance64x64_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance64x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance32x64_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance32x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance32x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance16x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance16x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance16x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance8x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance8x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance8x4_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance4x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_variance4x4_c |
447 | | \ |
448 | | uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \ |
449 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
450 | 0 | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ |
451 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
452 | 0 | uint16_t temp2[H * W]; \ |
453 | 0 | \ |
454 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
455 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
456 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
457 | 0 | bilinear_filters[y_offset]); \ |
458 | 0 | \ |
459 | 0 | return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
460 | 0 | ref_ptr, ref_stride, sse); \ |
461 | 0 | } Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance64x64_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance64x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance32x64_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance32x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance32x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance16x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance16x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance16x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance8x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance8x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance8x4_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance4x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_variance4x4_c |
462 | | |
463 | | #define HIGHBD_SUBPIX_AVG_VAR(W, H) \ |
464 | | uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ |
465 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
466 | | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ |
467 | 0 | const uint8_t *second_pred) { \ |
468 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
469 | 0 | uint16_t temp2[H * W]; \ |
470 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
471 | 0 | \ |
472 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
473 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
474 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
475 | 0 | bilinear_filters[y_offset]); \ |
476 | 0 | \ |
477 | 0 | vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ |
478 | 0 | temp2, W); \ |
479 | 0 | \ |
480 | 0 | return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
481 | 0 | ref_ptr, ref_stride, sse); \ |
482 | 0 | } \ Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance64x64_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance64x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance32x64_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance32x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance32x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance16x32_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance16x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance16x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance8x16_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance8x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance8x4_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance4x8_c Unexecuted instantiation: vpx_highbd_8_sub_pixel_avg_variance4x4_c |
483 | | \ |
484 | | uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ |
485 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
486 | | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ |
487 | 0 | const uint8_t *second_pred) { \ |
488 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
489 | 0 | uint16_t temp2[H * W]; \ |
490 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
491 | 0 | \ |
492 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
493 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
494 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
495 | 0 | bilinear_filters[y_offset]); \ |
496 | 0 | \ |
497 | 0 | vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ |
498 | 0 | temp2, W); \ |
499 | 0 | \ |
500 | 0 | return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
501 | 0 | ref_ptr, ref_stride, sse); \ |
502 | 0 | } \ Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance64x64_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance64x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance32x64_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance32x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance32x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance16x32_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance16x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance16x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance8x16_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance8x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance8x4_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance4x8_c Unexecuted instantiation: vpx_highbd_10_sub_pixel_avg_variance4x4_c |
503 | | \ |
504 | | uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ |
505 | | const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ |
506 | | const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ |
507 | 0 | const uint8_t *second_pred) { \ |
508 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
509 | 0 | uint16_t temp2[H * W]; \ |
510 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
511 | 0 | \ |
512 | 0 | highbd_var_filter_block2d_bil_first_pass( \ |
513 | 0 | src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ |
514 | 0 | highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
515 | 0 | bilinear_filters[y_offset]); \ |
516 | 0 | \ |
517 | 0 | vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ |
518 | 0 | temp2, W); \ |
519 | 0 | \ |
520 | 0 | return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
521 | 0 | ref_ptr, ref_stride, sse); \ |
522 | 0 | } Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance64x64_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance64x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance32x64_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance32x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance32x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance16x32_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance16x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance16x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance8x16_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance8x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance8x4_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance4x8_c Unexecuted instantiation: vpx_highbd_12_sub_pixel_avg_variance4x4_c |
523 | | |
524 | | /* All three forms of the variance are available in the same sizes. */ |
525 | | #define HIGHBD_VARIANCES(W, H) \ |
526 | | HIGHBD_VAR(W, H) \ |
527 | | HIGHBD_SUBPIX_VAR(W, H) \ |
528 | | HIGHBD_SUBPIX_AVG_VAR(W, H) |
529 | | |
530 | | HIGHBD_VARIANCES(64, 64) |
531 | | HIGHBD_VARIANCES(64, 32) |
532 | | HIGHBD_VARIANCES(32, 64) |
533 | | HIGHBD_VARIANCES(32, 32) |
534 | | HIGHBD_VARIANCES(32, 16) |
535 | | HIGHBD_VARIANCES(16, 32) |
536 | | HIGHBD_VARIANCES(16, 16) |
537 | | HIGHBD_VARIANCES(16, 8) |
538 | | HIGHBD_VARIANCES(8, 16) |
539 | | HIGHBD_VARIANCES(8, 8) |
540 | | HIGHBD_VARIANCES(8, 4) |
541 | | HIGHBD_VARIANCES(4, 8) |
542 | | HIGHBD_VARIANCES(4, 4) |
543 | | |
544 | | HIGHBD_GET_VAR(8) |
545 | | HIGHBD_GET_VAR(16) |
546 | | |
547 | | HIGHBD_MSE(16, 16) |
548 | | HIGHBD_MSE(16, 8) |
549 | | HIGHBD_MSE(8, 16) |
550 | | HIGHBD_MSE(8, 8) |
551 | | |
552 | | void vpx_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint16_t *pred, |
553 | | int width, int height, const uint16_t *ref, |
554 | 0 | int ref_stride) { |
555 | 0 | int i, j; |
556 | 0 | for (i = 0; i < height; ++i) { |
557 | 0 | for (j = 0; j < width; ++j) { |
558 | 0 | const int tmp = pred[j] + ref[j]; |
559 | 0 | comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); |
560 | 0 | } |
561 | 0 | comp_pred += width; |
562 | 0 | pred += width; |
563 | 0 | ref += ref_stride; |
564 | 0 | } |
565 | 0 | } |
566 | | #endif // CONFIG_VP9_HIGHBITDEPTH |