/src/aom/aom_dsp/variance.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | #include <assert.h> |
12 | | #include <stdlib.h> |
13 | | |
14 | | #include "config/aom_config.h" |
15 | | #include "config/aom_dsp_rtcd.h" |
16 | | |
17 | | #include "aom/aom_integer.h" |
18 | | #include "aom_ports/mem.h" |
19 | | |
20 | | #include "aom_dsp/aom_filter.h" |
21 | | #include "aom_dsp/blend.h" |
22 | | #include "aom_dsp/variance.h" |
23 | | |
24 | | #include "av1/common/filter.h" |
25 | | #include "av1/common/reconinter.h" |
26 | | |
27 | | #if !CONFIG_REALTIME_ONLY |
28 | 0 | uint32_t aom_get_mb_ss_c(const int16_t *a) { |
29 | 0 | unsigned int i, sum = 0; |
30 | |
|
31 | 0 | for (i = 0; i < 256; ++i) { |
32 | 0 | sum += a[i] * a[i]; |
33 | 0 | } |
34 | |
|
35 | 0 | return sum; |
36 | 0 | } |
37 | | #endif // !CONFIG_REALTIME_ONLY |
38 | | |
39 | | static void variance(const uint8_t *a, int a_stride, const uint8_t *b, |
40 | 0 | int b_stride, int w, int h, uint32_t *sse, int *sum) { |
41 | 0 | int i, j; |
42 | 0 | int tsum = 0; |
43 | 0 | uint32_t tsse = 0; |
44 | |
|
45 | 0 | for (i = 0; i < h; ++i) { |
46 | 0 | for (j = 0; j < w; ++j) { |
47 | 0 | const int diff = a[j] - b[j]; |
48 | 0 | tsum += diff; |
49 | 0 | tsse += diff * diff; |
50 | 0 | } |
51 | |
|
52 | 0 | a += a_stride; |
53 | 0 | b += b_stride; |
54 | 0 | } |
55 | 0 | *sum = tsum; |
56 | 0 | *sse = tsse; |
57 | 0 | } |
58 | | |
59 | | uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b, |
60 | 0 | int b_stride, int w, int h) { |
61 | 0 | uint32_t sse; |
62 | 0 | int sum; |
63 | 0 | variance(a, a_stride, b, b_stride, w, h, &sse, &sum); |
64 | 0 | return sse; |
65 | 0 | } |
66 | | |
67 | | // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal |
68 | | // or vertical direction to produce the filtered output block. Used to implement |
69 | | // the first-pass of 2-D separable filter. |
70 | | // |
71 | | // Produces int16_t output to retain precision for the next pass. Two filter |
72 | | // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is |
73 | | // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). |
74 | | // It defines the offset required to move from one input to the next. |
75 | | static void var_filter_block2d_bil_first_pass_c( |
76 | | const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line, |
77 | | unsigned int pixel_step, unsigned int output_height, |
78 | 0 | unsigned int output_width, const uint8_t *filter) { |
79 | 0 | unsigned int i, j; |
80 | |
|
81 | 0 | for (i = 0; i < output_height; ++i) { |
82 | 0 | for (j = 0; j < output_width; ++j) { |
83 | 0 | b[j] = ROUND_POWER_OF_TWO( |
84 | 0 | (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); |
85 | |
|
86 | 0 | ++a; |
87 | 0 | } |
88 | |
|
89 | 0 | a += src_pixels_per_line - output_width; |
90 | 0 | b += output_width; |
91 | 0 | } |
92 | 0 | } |
93 | | |
94 | | // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal |
95 | | // or vertical direction to produce the filtered output block. Used to implement |
96 | | // the second-pass of 2-D separable filter. |
97 | | // |
98 | | // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two |
99 | | // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the |
100 | | // filter is applied horizontally (pixel_step = 1) or vertically |
101 | | // (pixel_step = stride). It defines the offset required to move from one input |
102 | | // to the next. Output is 8-bit. |
103 | | static void var_filter_block2d_bil_second_pass_c( |
104 | | const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line, |
105 | | unsigned int pixel_step, unsigned int output_height, |
106 | 0 | unsigned int output_width, const uint8_t *filter) { |
107 | 0 | unsigned int i, j; |
108 | |
|
109 | 0 | for (i = 0; i < output_height; ++i) { |
110 | 0 | for (j = 0; j < output_width; ++j) { |
111 | 0 | b[j] = ROUND_POWER_OF_TWO( |
112 | 0 | (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); |
113 | 0 | ++a; |
114 | 0 | } |
115 | |
|
116 | 0 | a += src_pixels_per_line - output_width; |
117 | 0 | b += output_width; |
118 | 0 | } |
119 | 0 | } |
120 | | |
121 | | #define VAR(W, H) \ |
122 | | uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ |
123 | | const uint8_t *b, int b_stride, \ |
124 | 0 | uint32_t *sse) { \ |
125 | 0 | int sum; \ |
126 | 0 | variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ |
127 | 0 | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ |
128 | 0 | } Unexecuted instantiation: aom_variance128x128_c Unexecuted instantiation: aom_variance128x64_c Unexecuted instantiation: aom_variance64x128_c Unexecuted instantiation: aom_variance64x64_c Unexecuted instantiation: aom_variance64x32_c Unexecuted instantiation: aom_variance32x64_c Unexecuted instantiation: aom_variance32x32_c Unexecuted instantiation: aom_variance32x16_c Unexecuted instantiation: aom_variance16x32_c Unexecuted instantiation: aom_variance16x16_c Unexecuted instantiation: aom_variance16x8_c Unexecuted instantiation: aom_variance8x16_c Unexecuted instantiation: aom_variance8x8_c Unexecuted instantiation: aom_variance8x4_c Unexecuted instantiation: aom_variance4x8_c Unexecuted instantiation: aom_variance4x4_c Unexecuted instantiation: aom_variance4x16_c Unexecuted instantiation: aom_variance16x4_c Unexecuted instantiation: aom_variance8x32_c Unexecuted instantiation: aom_variance32x8_c Unexecuted instantiation: aom_variance16x64_c Unexecuted instantiation: aom_variance64x16_c |
129 | | |
130 | | #define SUBPIX_VAR(W, H) \ |
131 | | uint32_t aom_sub_pixel_variance##W##x##H##_c( \ |
132 | | const uint8_t *a, int a_stride, int xoffset, int yoffset, \ |
133 | 0 | const uint8_t *b, int b_stride, uint32_t *sse) { \ |
134 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
135 | 0 | uint8_t temp2[H * W]; \ |
136 | 0 | \ |
137 | 0 | var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \ |
138 | 0 | bilinear_filters_2t[xoffset]); \ |
139 | 0 | var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \ |
140 | 0 | bilinear_filters_2t[yoffset]); \ |
141 | 0 | \ |
142 | 0 | return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \ |
143 | 0 | } Unexecuted instantiation: aom_sub_pixel_variance128x128_c Unexecuted instantiation: aom_sub_pixel_variance128x64_c Unexecuted instantiation: aom_sub_pixel_variance64x128_c Unexecuted instantiation: aom_sub_pixel_variance64x64_c Unexecuted instantiation: aom_sub_pixel_variance64x32_c Unexecuted instantiation: aom_sub_pixel_variance32x64_c Unexecuted instantiation: aom_sub_pixel_variance32x32_c Unexecuted instantiation: aom_sub_pixel_variance32x16_c Unexecuted instantiation: aom_sub_pixel_variance16x32_c Unexecuted instantiation: aom_sub_pixel_variance16x16_c Unexecuted instantiation: aom_sub_pixel_variance16x8_c Unexecuted instantiation: aom_sub_pixel_variance8x16_c Unexecuted instantiation: aom_sub_pixel_variance8x8_c Unexecuted instantiation: aom_sub_pixel_variance8x4_c Unexecuted instantiation: aom_sub_pixel_variance4x8_c Unexecuted instantiation: aom_sub_pixel_variance4x4_c Unexecuted instantiation: aom_sub_pixel_variance4x16_c Unexecuted instantiation: aom_sub_pixel_variance16x4_c Unexecuted instantiation: aom_sub_pixel_variance8x32_c Unexecuted instantiation: aom_sub_pixel_variance32x8_c Unexecuted instantiation: aom_sub_pixel_variance16x64_c Unexecuted instantiation: aom_sub_pixel_variance64x16_c |
144 | | |
145 | | #define SUBPIX_AVG_VAR(W, H) \ |
146 | | uint32_t aom_sub_pixel_avg_variance##W##x##H##_c( \ |
147 | | const uint8_t *a, int a_stride, int xoffset, int yoffset, \ |
148 | | const uint8_t *b, int b_stride, uint32_t *sse, \ |
149 | 0 | const uint8_t *second_pred) { \ |
150 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
151 | 0 | uint8_t temp2[H * W]; \ |
152 | 0 | DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ |
153 | 0 | \ |
154 | 0 | var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \ |
155 | 0 | bilinear_filters_2t[xoffset]); \ |
156 | 0 | var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \ |
157 | 0 | bilinear_filters_2t[yoffset]); \ |
158 | 0 | \ |
159 | 0 | aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ |
160 | 0 | \ |
161 | 0 | return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \ |
162 | 0 | } Unexecuted instantiation: aom_sub_pixel_avg_variance128x128_c Unexecuted instantiation: aom_sub_pixel_avg_variance128x64_c Unexecuted instantiation: aom_sub_pixel_avg_variance64x128_c Unexecuted instantiation: aom_sub_pixel_avg_variance64x64_c Unexecuted instantiation: aom_sub_pixel_avg_variance64x32_c Unexecuted instantiation: aom_sub_pixel_avg_variance32x64_c Unexecuted instantiation: aom_sub_pixel_avg_variance32x32_c Unexecuted instantiation: aom_sub_pixel_avg_variance32x16_c Unexecuted instantiation: aom_sub_pixel_avg_variance16x32_c Unexecuted instantiation: aom_sub_pixel_avg_variance16x16_c Unexecuted instantiation: aom_sub_pixel_avg_variance16x8_c Unexecuted instantiation: aom_sub_pixel_avg_variance8x16_c Unexecuted instantiation: aom_sub_pixel_avg_variance8x8_c Unexecuted instantiation: aom_sub_pixel_avg_variance8x4_c Unexecuted instantiation: aom_sub_pixel_avg_variance4x8_c Unexecuted instantiation: aom_sub_pixel_avg_variance4x4_c Unexecuted instantiation: aom_sub_pixel_avg_variance4x16_c Unexecuted instantiation: aom_sub_pixel_avg_variance16x4_c Unexecuted instantiation: aom_sub_pixel_avg_variance8x32_c Unexecuted instantiation: aom_sub_pixel_avg_variance32x8_c Unexecuted instantiation: aom_sub_pixel_avg_variance16x64_c Unexecuted instantiation: aom_sub_pixel_avg_variance64x16_c |
163 | | |
164 | | void aom_get_var_sse_sum_8x8_quad_c(const uint8_t *a, int a_stride, |
165 | | const uint8_t *b, int b_stride, |
166 | | uint32_t *sse8x8, int *sum8x8, |
167 | | unsigned int *tot_sse, int *tot_sum, |
168 | 0 | uint32_t *var8x8) { |
169 | | // Loop over 4 8x8 blocks. Process one 8x32 block. |
170 | 0 | for (int k = 0; k < 4; k++) { |
171 | 0 | variance(a + (k * 8), a_stride, b + (k * 8), b_stride, 8, 8, &sse8x8[k], |
172 | 0 | &sum8x8[k]); |
173 | 0 | } |
174 | | |
175 | | // Calculate variance at 8x8 level and total sse, sum of 8x32 block. |
176 | 0 | *tot_sse += sse8x8[0] + sse8x8[1] + sse8x8[2] + sse8x8[3]; |
177 | 0 | *tot_sum += sum8x8[0] + sum8x8[1] + sum8x8[2] + sum8x8[3]; |
178 | 0 | for (int i = 0; i < 4; i++) |
179 | 0 | var8x8[i] = sse8x8[i] - (uint32_t)(((int64_t)sum8x8[i] * sum8x8[i]) >> 6); |
180 | 0 | } |
181 | | |
182 | | void aom_get_var_sse_sum_16x16_dual_c(const uint8_t *src_ptr, int source_stride, |
183 | | const uint8_t *ref_ptr, int ref_stride, |
184 | | uint32_t *sse16x16, unsigned int *tot_sse, |
185 | 0 | int *tot_sum, uint32_t *var16x16) { |
186 | 0 | int sum16x16[2] = { 0 }; |
187 | | // Loop over two consecutive 16x16 blocks and process as one 16x32 block. |
188 | 0 | for (int k = 0; k < 2; k++) { |
189 | 0 | variance(src_ptr + (k * 16), source_stride, ref_ptr + (k * 16), ref_stride, |
190 | 0 | 16, 16, &sse16x16[k], &sum16x16[k]); |
191 | 0 | } |
192 | | |
193 | | // Calculate variance at 16x16 level and total sse, sum of 16x32 block. |
194 | 0 | *tot_sse += sse16x16[0] + sse16x16[1]; |
195 | 0 | *tot_sum += sum16x16[0] + sum16x16[1]; |
196 | 0 | for (int i = 0; i < 2; i++) |
197 | 0 | var16x16[i] = |
198 | 0 | sse16x16[i] - (uint32_t)(((int64_t)sum16x16[i] * sum16x16[i]) >> 8); |
199 | 0 | } |
200 | | |
201 | | /* Identical to the variance call except it does not calculate the |
202 | | * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in |
203 | | * variable. |
204 | | */ |
205 | | #define MSE(W, H) \ |
206 | | uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \ |
207 | | const uint8_t *b, int b_stride, \ |
208 | 0 | uint32_t *sse) { \ |
209 | 0 | int sum; \ |
210 | 0 | variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ |
211 | 0 | return *sse; \ |
212 | 0 | } Unexecuted instantiation: aom_mse16x16_c Unexecuted instantiation: aom_mse16x8_c Unexecuted instantiation: aom_mse8x16_c Unexecuted instantiation: aom_mse8x8_c |
213 | | |
214 | | /* All three forms of the variance are available in the same sizes. */ |
215 | | #define VARIANCES(W, H) \ |
216 | | VAR(W, H) \ |
217 | | SUBPIX_VAR(W, H) \ |
218 | | SUBPIX_AVG_VAR(W, H) |
219 | | |
220 | | VARIANCES(128, 128) |
221 | | VARIANCES(128, 64) |
222 | | VARIANCES(64, 128) |
223 | | VARIANCES(64, 64) |
224 | | VARIANCES(64, 32) |
225 | | VARIANCES(32, 64) |
226 | | VARIANCES(32, 32) |
227 | | VARIANCES(32, 16) |
228 | | VARIANCES(16, 32) |
229 | | VARIANCES(16, 16) |
230 | | VARIANCES(16, 8) |
231 | | VARIANCES(8, 16) |
232 | | VARIANCES(8, 8) |
233 | | VARIANCES(8, 4) |
234 | | VARIANCES(4, 8) |
235 | | VARIANCES(4, 4) |
236 | | |
237 | | // Realtime mode doesn't use rectangular blocks. |
238 | | #if !CONFIG_REALTIME_ONLY |
239 | | VARIANCES(4, 16) |
240 | | VARIANCES(16, 4) |
241 | | VARIANCES(8, 32) |
242 | | VARIANCES(32, 8) |
243 | | VARIANCES(16, 64) |
244 | | VARIANCES(64, 16) |
245 | | #endif |
246 | | |
247 | | MSE(16, 16) |
248 | | MSE(16, 8) |
249 | | MSE(8, 16) |
250 | | MSE(8, 8) |
251 | | |
252 | | void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, |
253 | 0 | int height, const uint8_t *ref, int ref_stride) { |
254 | 0 | int i, j; |
255 | |
|
256 | 0 | for (i = 0; i < height; ++i) { |
257 | 0 | for (j = 0; j < width; ++j) { |
258 | 0 | const int tmp = pred[j] + ref[j]; |
259 | 0 | comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); |
260 | 0 | } |
261 | 0 | comp_pred += width; |
262 | 0 | pred += width; |
263 | 0 | ref += ref_stride; |
264 | 0 | } |
265 | 0 | } |
266 | | |
267 | | #if CONFIG_AV1_HIGHBITDEPTH |
268 | | static void highbd_variance64(const uint8_t *a8, int a_stride, |
269 | | const uint8_t *b8, int b_stride, int w, int h, |
270 | 0 | uint64_t *sse, int64_t *sum) { |
271 | 0 | const uint16_t *a = CONVERT_TO_SHORTPTR(a8); |
272 | 0 | const uint16_t *b = CONVERT_TO_SHORTPTR(b8); |
273 | 0 | int64_t tsum = 0; |
274 | 0 | uint64_t tsse = 0; |
275 | 0 | for (int i = 0; i < h; ++i) { |
276 | 0 | int32_t lsum = 0; |
277 | 0 | for (int j = 0; j < w; ++j) { |
278 | 0 | const int diff = a[j] - b[j]; |
279 | 0 | lsum += diff; |
280 | 0 | tsse += (uint32_t)(diff * diff); |
281 | 0 | } |
282 | 0 | tsum += lsum; |
283 | 0 | a += a_stride; |
284 | 0 | b += b_stride; |
285 | 0 | } |
286 | 0 | *sum = tsum; |
287 | 0 | *sse = tsse; |
288 | 0 | } |
289 | | |
290 | | uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride, |
291 | 0 | const uint8_t *b, int b_stride, int w, int h) { |
292 | 0 | uint64_t sse; |
293 | 0 | int64_t sum; |
294 | 0 | highbd_variance64(a, a_stride, b, b_stride, w, h, &sse, &sum); |
295 | 0 | return sse; |
296 | 0 | } |
297 | | |
298 | | static void highbd_8_variance(const uint8_t *a8, int a_stride, |
299 | | const uint8_t *b8, int b_stride, int w, int h, |
300 | 0 | uint32_t *sse, int *sum) { |
301 | 0 | uint64_t sse_long = 0; |
302 | 0 | int64_t sum_long = 0; |
303 | 0 | highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); |
304 | 0 | *sse = (uint32_t)sse_long; |
305 | 0 | *sum = (int)sum_long; |
306 | 0 | } |
307 | | |
308 | | static void highbd_10_variance(const uint8_t *a8, int a_stride, |
309 | | const uint8_t *b8, int b_stride, int w, int h, |
310 | 0 | uint32_t *sse, int *sum) { |
311 | 0 | uint64_t sse_long = 0; |
312 | 0 | int64_t sum_long = 0; |
313 | 0 | highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); |
314 | 0 | *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4); |
315 | 0 | *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); |
316 | 0 | } |
317 | | |
318 | | static void highbd_12_variance(const uint8_t *a8, int a_stride, |
319 | | const uint8_t *b8, int b_stride, int w, int h, |
320 | 0 | uint32_t *sse, int *sum) { |
321 | 0 | uint64_t sse_long = 0; |
322 | 0 | int64_t sum_long = 0; |
323 | 0 | highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); |
324 | 0 | *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8); |
325 | 0 | *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); |
326 | 0 | } |
327 | | |
328 | | #define HIGHBD_VAR(W, H) \ |
329 | | uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ |
330 | | const uint8_t *b, int b_stride, \ |
331 | 0 | uint32_t *sse) { \ |
332 | 0 | int sum; \ |
333 | 0 | highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ |
334 | 0 | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ |
335 | 0 | } \ Unexecuted instantiation: aom_highbd_8_variance128x128_c Unexecuted instantiation: aom_highbd_8_variance128x64_c Unexecuted instantiation: aom_highbd_8_variance64x128_c Unexecuted instantiation: aom_highbd_8_variance64x64_c Unexecuted instantiation: aom_highbd_8_variance64x32_c Unexecuted instantiation: aom_highbd_8_variance32x64_c Unexecuted instantiation: aom_highbd_8_variance32x32_c Unexecuted instantiation: aom_highbd_8_variance32x16_c Unexecuted instantiation: aom_highbd_8_variance16x32_c Unexecuted instantiation: aom_highbd_8_variance16x16_c Unexecuted instantiation: aom_highbd_8_variance16x8_c Unexecuted instantiation: aom_highbd_8_variance8x16_c Unexecuted instantiation: aom_highbd_8_variance8x8_c Unexecuted instantiation: aom_highbd_8_variance8x4_c Unexecuted instantiation: aom_highbd_8_variance4x8_c Unexecuted instantiation: aom_highbd_8_variance4x4_c Unexecuted instantiation: aom_highbd_8_variance4x16_c Unexecuted instantiation: aom_highbd_8_variance16x4_c Unexecuted instantiation: aom_highbd_8_variance8x32_c Unexecuted instantiation: aom_highbd_8_variance32x8_c Unexecuted instantiation: aom_highbd_8_variance16x64_c Unexecuted instantiation: aom_highbd_8_variance64x16_c |
336 | | \ |
337 | | uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ |
338 | | const uint8_t *b, int b_stride, \ |
339 | 0 | uint32_t *sse) { \ |
340 | 0 | int sum; \ |
341 | 0 | int64_t var; \ |
342 | 0 | highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ |
343 | 0 | var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ |
344 | 0 | return (var >= 0) ? (uint32_t)var : 0; \ |
345 | 0 | } \ Unexecuted instantiation: aom_highbd_10_variance128x128_c Unexecuted instantiation: aom_highbd_10_variance128x64_c Unexecuted instantiation: aom_highbd_10_variance64x128_c Unexecuted instantiation: aom_highbd_10_variance64x64_c Unexecuted instantiation: aom_highbd_10_variance64x32_c Unexecuted instantiation: aom_highbd_10_variance32x64_c Unexecuted instantiation: aom_highbd_10_variance32x32_c Unexecuted instantiation: aom_highbd_10_variance32x16_c Unexecuted instantiation: aom_highbd_10_variance16x32_c Unexecuted instantiation: aom_highbd_10_variance16x16_c Unexecuted instantiation: aom_highbd_10_variance16x8_c Unexecuted instantiation: aom_highbd_10_variance8x16_c Unexecuted instantiation: aom_highbd_10_variance8x8_c Unexecuted instantiation: aom_highbd_10_variance8x4_c Unexecuted instantiation: aom_highbd_10_variance4x8_c Unexecuted instantiation: aom_highbd_10_variance4x4_c Unexecuted instantiation: aom_highbd_10_variance4x16_c Unexecuted instantiation: aom_highbd_10_variance16x4_c Unexecuted instantiation: aom_highbd_10_variance8x32_c Unexecuted instantiation: aom_highbd_10_variance32x8_c Unexecuted instantiation: aom_highbd_10_variance16x64_c Unexecuted instantiation: aom_highbd_10_variance64x16_c |
346 | | \ |
347 | | uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ |
348 | | const uint8_t *b, int b_stride, \ |
349 | 0 | uint32_t *sse) { \ |
350 | 0 | int sum; \ |
351 | 0 | int64_t var; \ |
352 | 0 | highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ |
353 | 0 | var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ |
354 | 0 | return (var >= 0) ? (uint32_t)var : 0; \ |
355 | 0 | } Unexecuted instantiation: aom_highbd_12_variance128x128_c Unexecuted instantiation: aom_highbd_12_variance128x64_c Unexecuted instantiation: aom_highbd_12_variance64x128_c Unexecuted instantiation: aom_highbd_12_variance64x64_c Unexecuted instantiation: aom_highbd_12_variance64x32_c Unexecuted instantiation: aom_highbd_12_variance32x64_c Unexecuted instantiation: aom_highbd_12_variance32x32_c Unexecuted instantiation: aom_highbd_12_variance32x16_c Unexecuted instantiation: aom_highbd_12_variance16x32_c Unexecuted instantiation: aom_highbd_12_variance16x16_c Unexecuted instantiation: aom_highbd_12_variance16x8_c Unexecuted instantiation: aom_highbd_12_variance8x16_c Unexecuted instantiation: aom_highbd_12_variance8x8_c Unexecuted instantiation: aom_highbd_12_variance8x4_c Unexecuted instantiation: aom_highbd_12_variance4x8_c Unexecuted instantiation: aom_highbd_12_variance4x4_c Unexecuted instantiation: aom_highbd_12_variance4x16_c Unexecuted instantiation: aom_highbd_12_variance16x4_c Unexecuted instantiation: aom_highbd_12_variance8x32_c Unexecuted instantiation: aom_highbd_12_variance32x8_c Unexecuted instantiation: aom_highbd_12_variance16x64_c Unexecuted instantiation: aom_highbd_12_variance64x16_c |
356 | | |
357 | | #define HIGHBD_MSE(W, H) \ |
358 | | uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ |
359 | | const uint8_t *ref, int ref_stride, \ |
360 | 0 | uint32_t *sse) { \ |
361 | 0 | int sum; \ |
362 | 0 | highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ |
363 | 0 | return *sse; \ |
364 | 0 | } \ Unexecuted instantiation: aom_highbd_8_mse16x16_c Unexecuted instantiation: aom_highbd_8_mse16x8_c Unexecuted instantiation: aom_highbd_8_mse8x16_c Unexecuted instantiation: aom_highbd_8_mse8x8_c |
365 | | \ |
366 | | uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ |
367 | | const uint8_t *ref, int ref_stride, \ |
368 | 0 | uint32_t *sse) { \ |
369 | 0 | int sum; \ |
370 | 0 | highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ |
371 | 0 | return *sse; \ |
372 | 0 | } \ Unexecuted instantiation: aom_highbd_10_mse16x16_c Unexecuted instantiation: aom_highbd_10_mse16x8_c Unexecuted instantiation: aom_highbd_10_mse8x16_c Unexecuted instantiation: aom_highbd_10_mse8x8_c |
373 | | \ |
374 | | uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ |
375 | | const uint8_t *ref, int ref_stride, \ |
376 | 0 | uint32_t *sse) { \ |
377 | 0 | int sum; \ |
378 | 0 | highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ |
379 | 0 | return *sse; \ |
380 | 0 | } Unexecuted instantiation: aom_highbd_12_mse16x16_c Unexecuted instantiation: aom_highbd_12_mse16x8_c Unexecuted instantiation: aom_highbd_12_mse8x16_c Unexecuted instantiation: aom_highbd_12_mse8x8_c |
381 | | |
382 | | void aom_highbd_var_filter_block2d_bil_first_pass( |
383 | | const uint8_t *src_ptr8, uint16_t *output_ptr, |
384 | | unsigned int src_pixels_per_line, int pixel_step, |
385 | | unsigned int output_height, unsigned int output_width, |
386 | 0 | const uint8_t *filter) { |
387 | 0 | unsigned int i, j; |
388 | 0 | uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); |
389 | 0 | for (i = 0; i < output_height; ++i) { |
390 | 0 | for (j = 0; j < output_width; ++j) { |
391 | 0 | output_ptr[j] = ROUND_POWER_OF_TWO( |
392 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
393 | 0 | FILTER_BITS); |
394 | |
|
395 | 0 | ++src_ptr; |
396 | 0 | } |
397 | | |
398 | | // Next row... |
399 | 0 | src_ptr += src_pixels_per_line - output_width; |
400 | 0 | output_ptr += output_width; |
401 | 0 | } |
402 | 0 | } |
403 | | |
404 | | void aom_highbd_var_filter_block2d_bil_second_pass( |
405 | | const uint16_t *src_ptr, uint16_t *output_ptr, |
406 | | unsigned int src_pixels_per_line, unsigned int pixel_step, |
407 | | unsigned int output_height, unsigned int output_width, |
408 | 0 | const uint8_t *filter) { |
409 | 0 | unsigned int i, j; |
410 | |
|
411 | 0 | for (i = 0; i < output_height; ++i) { |
412 | 0 | for (j = 0; j < output_width; ++j) { |
413 | 0 | output_ptr[j] = ROUND_POWER_OF_TWO( |
414 | 0 | (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], |
415 | 0 | FILTER_BITS); |
416 | 0 | ++src_ptr; |
417 | 0 | } |
418 | |
|
419 | 0 | src_ptr += src_pixels_per_line - output_width; |
420 | 0 | output_ptr += output_width; |
421 | 0 | } |
422 | 0 | } |
423 | | |
424 | | #define HIGHBD_SUBPIX_VAR(W, H) \ |
425 | | uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c( \ |
426 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
427 | 0 | const uint8_t *dst, int dst_stride, uint32_t *sse) { \ |
428 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
429 | 0 | uint16_t temp2[H * W]; \ |
430 | 0 | \ |
431 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
432 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
433 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
434 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
435 | 0 | \ |
436 | 0 | return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
437 | 0 | dst, dst_stride, sse); \ |
438 | 0 | } \ Unexecuted instantiation: aom_highbd_8_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_8_sub_pixel_variance64x16_c |
439 | | \ |
440 | | uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c( \ |
441 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
442 | 0 | const uint8_t *dst, int dst_stride, uint32_t *sse) { \ |
443 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
444 | 0 | uint16_t temp2[H * W]; \ |
445 | 0 | \ |
446 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
447 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
448 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
449 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
450 | 0 | \ |
451 | 0 | return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
452 | 0 | dst, dst_stride, sse); \ |
453 | 0 | } \ Unexecuted instantiation: aom_highbd_10_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_10_sub_pixel_variance64x16_c |
454 | | \ |
455 | | uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c( \ |
456 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
457 | 0 | const uint8_t *dst, int dst_stride, uint32_t *sse) { \ |
458 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
459 | 0 | uint16_t temp2[H * W]; \ |
460 | 0 | \ |
461 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
462 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
463 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
464 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
465 | 0 | \ |
466 | 0 | return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ |
467 | 0 | dst, dst_stride, sse); \ |
468 | 0 | } Unexecuted instantiation: aom_highbd_12_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_12_sub_pixel_variance64x16_c |
469 | | |
470 | | #define HIGHBD_SUBPIX_AVG_VAR(W, H) \ |
471 | | uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ |
472 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
473 | | const uint8_t *dst, int dst_stride, uint32_t *sse, \ |
474 | 0 | const uint8_t *second_pred) { \ |
475 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
476 | 0 | uint16_t temp2[H * W]; \ |
477 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
478 | 0 | \ |
479 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
480 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
481 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
482 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
483 | 0 | \ |
484 | 0 | aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ |
485 | 0 | CONVERT_TO_BYTEPTR(temp2), W); \ |
486 | 0 | \ |
487 | 0 | return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
488 | 0 | dst, dst_stride, sse); \ |
489 | 0 | } \ Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance128x128_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance128x64_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance64x128_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance64x64_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance64x32_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance32x64_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance32x32_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance32x16_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance16x32_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance16x16_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance16x8_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance8x16_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance8x8_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance8x4_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance4x8_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance4x4_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance4x16_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance16x4_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance8x32_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance32x8_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance16x64_c Unexecuted instantiation: aom_highbd_8_sub_pixel_avg_variance64x16_c |
490 | | \ |
491 | | uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ |
492 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
493 | | const uint8_t *dst, int dst_stride, uint32_t *sse, \ |
494 | 0 | const uint8_t *second_pred) { \ |
495 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
496 | 0 | uint16_t temp2[H * W]; \ |
497 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
498 | 0 | \ |
499 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
500 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
501 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
502 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
503 | 0 | \ |
504 | 0 | aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ |
505 | 0 | CONVERT_TO_BYTEPTR(temp2), W); \ |
506 | 0 | \ |
507 | 0 | return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
508 | 0 | dst, dst_stride, sse); \ |
509 | 0 | } \ Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance128x128_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance128x64_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance64x128_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance64x64_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance64x32_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance32x64_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance32x32_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance32x16_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance16x32_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance16x16_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance16x8_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance8x16_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance8x8_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance8x4_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance4x8_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance4x4_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance4x16_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance16x4_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance8x32_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance32x8_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance16x64_c Unexecuted instantiation: aom_highbd_10_sub_pixel_avg_variance64x16_c |
510 | | \ |
511 | | uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ |
512 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
513 | | const uint8_t *dst, int dst_stride, uint32_t *sse, \ |
514 | 0 | const uint8_t *second_pred) { \ |
515 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
516 | 0 | uint16_t temp2[H * W]; \ |
517 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
518 | 0 | \ |
519 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
520 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
521 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
522 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
523 | 0 | \ |
524 | 0 | aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ |
525 | 0 | CONVERT_TO_BYTEPTR(temp2), W); \ |
526 | 0 | \ |
527 | 0 | return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
528 | 0 | dst, dst_stride, sse); \ |
529 | 0 | } \ Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance128x128_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance128x64_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance64x128_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance64x64_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance64x32_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance32x64_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance32x32_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance32x16_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance16x32_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance16x16_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance16x8_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance8x16_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance8x8_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance8x4_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance4x8_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance4x4_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance4x16_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance16x4_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance8x32_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance32x8_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance16x64_c Unexecuted instantiation: aom_highbd_12_sub_pixel_avg_variance64x16_c |
530 | | \ |
531 | | /* All three forms of the variance are available in the same sizes. */ |
532 | | #define HIGHBD_VARIANCES(W, H) \ |
533 | | HIGHBD_VAR(W, H) \ |
534 | | HIGHBD_SUBPIX_VAR(W, H) \ |
535 | | HIGHBD_SUBPIX_AVG_VAR(W, H) |
536 | | |
537 | | HIGHBD_VARIANCES(128, 128) |
538 | | HIGHBD_VARIANCES(128, 64) |
539 | | HIGHBD_VARIANCES(64, 128) |
540 | | HIGHBD_VARIANCES(64, 64) |
541 | | HIGHBD_VARIANCES(64, 32) |
542 | | HIGHBD_VARIANCES(32, 64) |
543 | | HIGHBD_VARIANCES(32, 32) |
544 | | HIGHBD_VARIANCES(32, 16) |
545 | | HIGHBD_VARIANCES(16, 32) |
546 | | HIGHBD_VARIANCES(16, 16) |
547 | | HIGHBD_VARIANCES(16, 8) |
548 | | HIGHBD_VARIANCES(8, 16) |
549 | | HIGHBD_VARIANCES(8, 8) |
550 | | HIGHBD_VARIANCES(8, 4) |
551 | | HIGHBD_VARIANCES(4, 8) |
552 | | HIGHBD_VARIANCES(4, 4) |
553 | | |
554 | | // Realtime mode doesn't use 4x rectangular blocks. |
555 | | #if !CONFIG_REALTIME_ONLY |
556 | | HIGHBD_VARIANCES(4, 16) |
557 | | HIGHBD_VARIANCES(16, 4) |
558 | | HIGHBD_VARIANCES(8, 32) |
559 | | HIGHBD_VARIANCES(32, 8) |
560 | | HIGHBD_VARIANCES(16, 64) |
561 | | HIGHBD_VARIANCES(64, 16) |
562 | | #endif |
563 | | |
564 | | HIGHBD_MSE(16, 16) |
565 | | HIGHBD_MSE(16, 8) |
566 | | HIGHBD_MSE(8, 16) |
567 | | HIGHBD_MSE(8, 8) |
568 | | |
569 | | void aom_highbd_comp_avg_pred_c(uint8_t *comp_pred8, const uint8_t *pred8, |
570 | | int width, int height, const uint8_t *ref8, |
571 | 0 | int ref_stride) { |
572 | 0 | int i, j; |
573 | 0 | uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); |
574 | 0 | uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); |
575 | 0 | uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8); |
576 | 0 | for (i = 0; i < height; ++i) { |
577 | 0 | for (j = 0; j < width; ++j) { |
578 | 0 | const int tmp = pred[j] + ref[j]; |
579 | 0 | comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); |
580 | 0 | } |
581 | 0 | comp_pred += width; |
582 | 0 | pred += width; |
583 | 0 | ref += ref_stride; |
584 | 0 | } |
585 | 0 | } |
586 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
587 | | |
588 | | void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, |
589 | | int height, const uint8_t *ref, int ref_stride, |
590 | | const uint8_t *mask, int mask_stride, |
591 | 0 | int invert_mask) { |
592 | 0 | int i, j; |
593 | 0 | const uint8_t *src0 = invert_mask ? pred : ref; |
594 | 0 | const uint8_t *src1 = invert_mask ? ref : pred; |
595 | 0 | const int stride0 = invert_mask ? width : ref_stride; |
596 | 0 | const int stride1 = invert_mask ? ref_stride : width; |
597 | 0 | for (i = 0; i < height; ++i) { |
598 | 0 | for (j = 0; j < width; ++j) { |
599 | 0 | comp_pred[j] = AOM_BLEND_A64(mask[j], src0[j], src1[j]); |
600 | 0 | } |
601 | 0 | comp_pred += width; |
602 | 0 | src0 += stride0; |
603 | 0 | src1 += stride1; |
604 | 0 | mask += mask_stride; |
605 | 0 | } |
606 | 0 | } |
607 | | |
608 | | #define MASK_SUBPIX_VAR(W, H) \ |
609 | | unsigned int aom_masked_sub_pixel_variance##W##x##H##_c( \ |
610 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
611 | | const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \ |
612 | | const uint8_t *msk, int msk_stride, int invert_mask, \ |
613 | 0 | unsigned int *sse) { \ |
614 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
615 | 0 | uint8_t temp2[H * W]; \ |
616 | 0 | DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ |
617 | 0 | \ |
618 | 0 | var_filter_block2d_bil_first_pass_c(src, fdata3, src_stride, 1, H + 1, W, \ |
619 | 0 | bilinear_filters_2t[xoffset]); \ |
620 | 0 | var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \ |
621 | 0 | bilinear_filters_2t[yoffset]); \ |
622 | 0 | \ |
623 | 0 | aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \ |
624 | 0 | invert_mask); \ |
625 | 0 | return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse); \ |
626 | 0 | } Unexecuted instantiation: aom_masked_sub_pixel_variance4x4_c Unexecuted instantiation: aom_masked_sub_pixel_variance4x8_c Unexecuted instantiation: aom_masked_sub_pixel_variance8x4_c Unexecuted instantiation: aom_masked_sub_pixel_variance8x8_c Unexecuted instantiation: aom_masked_sub_pixel_variance8x16_c Unexecuted instantiation: aom_masked_sub_pixel_variance16x8_c Unexecuted instantiation: aom_masked_sub_pixel_variance16x16_c Unexecuted instantiation: aom_masked_sub_pixel_variance16x32_c Unexecuted instantiation: aom_masked_sub_pixel_variance32x16_c Unexecuted instantiation: aom_masked_sub_pixel_variance32x32_c Unexecuted instantiation: aom_masked_sub_pixel_variance32x64_c Unexecuted instantiation: aom_masked_sub_pixel_variance64x32_c Unexecuted instantiation: aom_masked_sub_pixel_variance64x64_c Unexecuted instantiation: aom_masked_sub_pixel_variance64x128_c Unexecuted instantiation: aom_masked_sub_pixel_variance128x64_c Unexecuted instantiation: aom_masked_sub_pixel_variance128x128_c Unexecuted instantiation: aom_masked_sub_pixel_variance4x16_c Unexecuted instantiation: aom_masked_sub_pixel_variance16x4_c Unexecuted instantiation: aom_masked_sub_pixel_variance8x32_c Unexecuted instantiation: aom_masked_sub_pixel_variance32x8_c Unexecuted instantiation: aom_masked_sub_pixel_variance16x64_c Unexecuted instantiation: aom_masked_sub_pixel_variance64x16_c |
627 | | |
628 | | MASK_SUBPIX_VAR(4, 4) |
629 | | MASK_SUBPIX_VAR(4, 8) |
630 | | MASK_SUBPIX_VAR(8, 4) |
631 | | MASK_SUBPIX_VAR(8, 8) |
632 | | MASK_SUBPIX_VAR(8, 16) |
633 | | MASK_SUBPIX_VAR(16, 8) |
634 | | MASK_SUBPIX_VAR(16, 16) |
635 | | MASK_SUBPIX_VAR(16, 32) |
636 | | MASK_SUBPIX_VAR(32, 16) |
637 | | MASK_SUBPIX_VAR(32, 32) |
638 | | MASK_SUBPIX_VAR(32, 64) |
639 | | MASK_SUBPIX_VAR(64, 32) |
640 | | MASK_SUBPIX_VAR(64, 64) |
641 | | MASK_SUBPIX_VAR(64, 128) |
642 | | MASK_SUBPIX_VAR(128, 64) |
643 | | MASK_SUBPIX_VAR(128, 128) |
644 | | |
645 | | // Realtime mode doesn't use 4x rectangular blocks. |
646 | | #if !CONFIG_REALTIME_ONLY |
647 | | MASK_SUBPIX_VAR(4, 16) |
648 | | MASK_SUBPIX_VAR(16, 4) |
649 | | MASK_SUBPIX_VAR(8, 32) |
650 | | MASK_SUBPIX_VAR(32, 8) |
651 | | MASK_SUBPIX_VAR(16, 64) |
652 | | MASK_SUBPIX_VAR(64, 16) |
653 | | #endif |
654 | | |
655 | | #if CONFIG_AV1_HIGHBITDEPTH |
656 | | void aom_highbd_comp_mask_pred_c(uint8_t *comp_pred8, const uint8_t *pred8, |
657 | | int width, int height, const uint8_t *ref8, |
658 | | int ref_stride, const uint8_t *mask, |
659 | 0 | int mask_stride, int invert_mask) { |
660 | 0 | int i, j; |
661 | 0 | uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); |
662 | 0 | uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); |
663 | 0 | uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8); |
664 | 0 | for (i = 0; i < height; ++i) { |
665 | 0 | for (j = 0; j < width; ++j) { |
666 | 0 | if (!invert_mask) |
667 | 0 | comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]); |
668 | 0 | else |
669 | 0 | comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]); |
670 | 0 | } |
671 | 0 | comp_pred += width; |
672 | 0 | pred += width; |
673 | 0 | ref += ref_stride; |
674 | 0 | mask += mask_stride; |
675 | 0 | } |
676 | 0 | } |
677 | | |
678 | | #define HIGHBD_MASK_SUBPIX_VAR(W, H) \ |
679 | | unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c( \ |
680 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
681 | | const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \ |
682 | | const uint8_t *msk, int msk_stride, int invert_mask, \ |
683 | 0 | unsigned int *sse) { \ |
684 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
685 | 0 | uint16_t temp2[H * W]; \ |
686 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
687 | 0 | \ |
688 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
689 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
690 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
691 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
692 | 0 | \ |
693 | 0 | aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ |
694 | 0 | CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ |
695 | 0 | invert_mask); \ |
696 | 0 | \ |
697 | 0 | return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
698 | 0 | ref, ref_stride, sse); \ |
699 | 0 | } \ Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_8_masked_sub_pixel_variance64x16_c |
700 | | \ |
701 | | unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \ |
702 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
703 | | const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \ |
704 | | const uint8_t *msk, int msk_stride, int invert_mask, \ |
705 | 0 | unsigned int *sse) { \ |
706 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
707 | 0 | uint16_t temp2[H * W]; \ |
708 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
709 | 0 | \ |
710 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
711 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
712 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
713 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
714 | 0 | \ |
715 | 0 | aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ |
716 | 0 | CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ |
717 | 0 | invert_mask); \ |
718 | 0 | \ |
719 | 0 | return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
720 | 0 | ref, ref_stride, sse); \ |
721 | 0 | } \ Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_10_masked_sub_pixel_variance64x16_c |
722 | | \ |
723 | | unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \ |
724 | | const uint8_t *src, int src_stride, int xoffset, int yoffset, \ |
725 | | const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \ |
726 | | const uint8_t *msk, int msk_stride, int invert_mask, \ |
727 | 0 | unsigned int *sse) { \ |
728 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
729 | 0 | uint16_t temp2[H * W]; \ |
730 | 0 | DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
731 | 0 | \ |
732 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
733 | 0 | src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
734 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
735 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
736 | 0 | \ |
737 | 0 | aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ |
738 | 0 | CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ |
739 | 0 | invert_mask); \ |
740 | 0 | \ |
741 | 0 | return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ |
742 | 0 | ref, ref_stride, sse); \ |
743 | 0 | } Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_12_masked_sub_pixel_variance64x16_c |
744 | | |
745 | | HIGHBD_MASK_SUBPIX_VAR(4, 4) |
746 | | HIGHBD_MASK_SUBPIX_VAR(4, 8) |
747 | | HIGHBD_MASK_SUBPIX_VAR(8, 4) |
748 | | HIGHBD_MASK_SUBPIX_VAR(8, 8) |
749 | | HIGHBD_MASK_SUBPIX_VAR(8, 16) |
750 | | HIGHBD_MASK_SUBPIX_VAR(16, 8) |
751 | | HIGHBD_MASK_SUBPIX_VAR(16, 16) |
752 | | HIGHBD_MASK_SUBPIX_VAR(16, 32) |
753 | | HIGHBD_MASK_SUBPIX_VAR(32, 16) |
754 | | HIGHBD_MASK_SUBPIX_VAR(32, 32) |
755 | | HIGHBD_MASK_SUBPIX_VAR(32, 64) |
756 | | HIGHBD_MASK_SUBPIX_VAR(64, 32) |
757 | | HIGHBD_MASK_SUBPIX_VAR(64, 64) |
758 | | HIGHBD_MASK_SUBPIX_VAR(64, 128) |
759 | | HIGHBD_MASK_SUBPIX_VAR(128, 64) |
760 | | HIGHBD_MASK_SUBPIX_VAR(128, 128) |
761 | | #if !CONFIG_REALTIME_ONLY |
762 | | HIGHBD_MASK_SUBPIX_VAR(4, 16) |
763 | | HIGHBD_MASK_SUBPIX_VAR(16, 4) |
764 | | HIGHBD_MASK_SUBPIX_VAR(8, 32) |
765 | | HIGHBD_MASK_SUBPIX_VAR(32, 8) |
766 | | HIGHBD_MASK_SUBPIX_VAR(16, 64) |
767 | | HIGHBD_MASK_SUBPIX_VAR(64, 16) |
768 | | #endif |
769 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
770 | | |
771 | | #if !CONFIG_REALTIME_ONLY |
772 | | static inline void obmc_variance(const uint8_t *pre, int pre_stride, |
773 | | const int32_t *wsrc, const int32_t *mask, |
774 | 0 | int w, int h, unsigned int *sse, int *sum) { |
775 | 0 | int i, j; |
776 | 0 | unsigned int tsse = 0; |
777 | 0 | int tsum = 0; |
778 | |
|
779 | 0 | for (i = 0; i < h; i++) { |
780 | 0 | for (j = 0; j < w; j++) { |
781 | 0 | int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12); |
782 | 0 | tsum += diff; |
783 | 0 | tsse += diff * diff; |
784 | 0 | } |
785 | |
|
786 | 0 | pre += pre_stride; |
787 | 0 | wsrc += w; |
788 | 0 | mask += w; |
789 | 0 | } |
790 | 0 | *sse = tsse; |
791 | 0 | *sum = tsum; |
792 | 0 | } |
793 | | |
794 | | #define OBMC_VAR(W, H) \ |
795 | | unsigned int aom_obmc_variance##W##x##H##_c( \ |
796 | | const uint8_t *pre, int pre_stride, const int32_t *wsrc, \ |
797 | 0 | const int32_t *mask, unsigned int *sse) { \ |
798 | 0 | int sum; \ |
799 | 0 | obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \ |
800 | 0 | return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \ |
801 | 0 | } Unexecuted instantiation: aom_obmc_variance4x4_c Unexecuted instantiation: aom_obmc_variance4x8_c Unexecuted instantiation: aom_obmc_variance8x4_c Unexecuted instantiation: aom_obmc_variance8x8_c Unexecuted instantiation: aom_obmc_variance8x16_c Unexecuted instantiation: aom_obmc_variance16x8_c Unexecuted instantiation: aom_obmc_variance16x16_c Unexecuted instantiation: aom_obmc_variance16x32_c Unexecuted instantiation: aom_obmc_variance32x16_c Unexecuted instantiation: aom_obmc_variance32x32_c Unexecuted instantiation: aom_obmc_variance32x64_c Unexecuted instantiation: aom_obmc_variance64x32_c Unexecuted instantiation: aom_obmc_variance64x64_c Unexecuted instantiation: aom_obmc_variance64x128_c Unexecuted instantiation: aom_obmc_variance128x64_c Unexecuted instantiation: aom_obmc_variance128x128_c Unexecuted instantiation: aom_obmc_variance4x16_c Unexecuted instantiation: aom_obmc_variance16x4_c Unexecuted instantiation: aom_obmc_variance8x32_c Unexecuted instantiation: aom_obmc_variance32x8_c Unexecuted instantiation: aom_obmc_variance16x64_c Unexecuted instantiation: aom_obmc_variance64x16_c |
802 | | |
803 | | #define OBMC_SUBPIX_VAR(W, H) \ |
804 | | unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c( \ |
805 | | const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \ |
806 | 0 | const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \ |
807 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
808 | 0 | uint8_t temp2[H * W]; \ |
809 | 0 | \ |
810 | 0 | var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, W, \ |
811 | 0 | bilinear_filters_2t[xoffset]); \ |
812 | 0 | var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \ |
813 | 0 | bilinear_filters_2t[yoffset]); \ |
814 | 0 | \ |
815 | 0 | return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse); \ |
816 | 0 | } Unexecuted instantiation: aom_obmc_sub_pixel_variance4x4_c Unexecuted instantiation: aom_obmc_sub_pixel_variance4x8_c Unexecuted instantiation: aom_obmc_sub_pixel_variance8x4_c Unexecuted instantiation: aom_obmc_sub_pixel_variance8x8_c Unexecuted instantiation: aom_obmc_sub_pixel_variance8x16_c Unexecuted instantiation: aom_obmc_sub_pixel_variance16x8_c Unexecuted instantiation: aom_obmc_sub_pixel_variance16x16_c Unexecuted instantiation: aom_obmc_sub_pixel_variance16x32_c Unexecuted instantiation: aom_obmc_sub_pixel_variance32x16_c Unexecuted instantiation: aom_obmc_sub_pixel_variance32x32_c Unexecuted instantiation: aom_obmc_sub_pixel_variance32x64_c Unexecuted instantiation: aom_obmc_sub_pixel_variance64x32_c Unexecuted instantiation: aom_obmc_sub_pixel_variance64x64_c Unexecuted instantiation: aom_obmc_sub_pixel_variance64x128_c Unexecuted instantiation: aom_obmc_sub_pixel_variance128x64_c Unexecuted instantiation: aom_obmc_sub_pixel_variance128x128_c Unexecuted instantiation: aom_obmc_sub_pixel_variance4x16_c Unexecuted instantiation: aom_obmc_sub_pixel_variance16x4_c Unexecuted instantiation: aom_obmc_sub_pixel_variance8x32_c Unexecuted instantiation: aom_obmc_sub_pixel_variance32x8_c Unexecuted instantiation: aom_obmc_sub_pixel_variance16x64_c Unexecuted instantiation: aom_obmc_sub_pixel_variance64x16_c |
817 | | |
818 | | OBMC_VAR(4, 4) |
819 | | OBMC_SUBPIX_VAR(4, 4) |
820 | | |
821 | | OBMC_VAR(4, 8) |
822 | | OBMC_SUBPIX_VAR(4, 8) |
823 | | |
824 | | OBMC_VAR(8, 4) |
825 | | OBMC_SUBPIX_VAR(8, 4) |
826 | | |
827 | | OBMC_VAR(8, 8) |
828 | | OBMC_SUBPIX_VAR(8, 8) |
829 | | |
830 | | OBMC_VAR(8, 16) |
831 | | OBMC_SUBPIX_VAR(8, 16) |
832 | | |
833 | | OBMC_VAR(16, 8) |
834 | | OBMC_SUBPIX_VAR(16, 8) |
835 | | |
836 | | OBMC_VAR(16, 16) |
837 | | OBMC_SUBPIX_VAR(16, 16) |
838 | | |
839 | | OBMC_VAR(16, 32) |
840 | | OBMC_SUBPIX_VAR(16, 32) |
841 | | |
842 | | OBMC_VAR(32, 16) |
843 | | OBMC_SUBPIX_VAR(32, 16) |
844 | | |
845 | | OBMC_VAR(32, 32) |
846 | | OBMC_SUBPIX_VAR(32, 32) |
847 | | |
848 | | OBMC_VAR(32, 64) |
849 | | OBMC_SUBPIX_VAR(32, 64) |
850 | | |
851 | | OBMC_VAR(64, 32) |
852 | | OBMC_SUBPIX_VAR(64, 32) |
853 | | |
854 | | OBMC_VAR(64, 64) |
855 | | OBMC_SUBPIX_VAR(64, 64) |
856 | | |
857 | | OBMC_VAR(64, 128) |
858 | | OBMC_SUBPIX_VAR(64, 128) |
859 | | |
860 | | OBMC_VAR(128, 64) |
861 | | OBMC_SUBPIX_VAR(128, 64) |
862 | | |
863 | | OBMC_VAR(128, 128) |
864 | | OBMC_SUBPIX_VAR(128, 128) |
865 | | |
866 | | OBMC_VAR(4, 16) |
867 | | OBMC_SUBPIX_VAR(4, 16) |
868 | | OBMC_VAR(16, 4) |
869 | | OBMC_SUBPIX_VAR(16, 4) |
870 | | OBMC_VAR(8, 32) |
871 | | OBMC_SUBPIX_VAR(8, 32) |
872 | | OBMC_VAR(32, 8) |
873 | | OBMC_SUBPIX_VAR(32, 8) |
874 | | OBMC_VAR(16, 64) |
875 | | OBMC_SUBPIX_VAR(16, 64) |
876 | | OBMC_VAR(64, 16) |
877 | | OBMC_SUBPIX_VAR(64, 16) |
878 | | |
879 | | #if CONFIG_AV1_HIGHBITDEPTH |
880 | | static inline void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride, |
881 | | const int32_t *wsrc, |
882 | | const int32_t *mask, int w, int h, |
883 | 0 | uint64_t *sse, int64_t *sum) { |
884 | 0 | int i, j; |
885 | 0 | uint16_t *pre = CONVERT_TO_SHORTPTR(pre8); |
886 | 0 | uint64_t tsse = 0; |
887 | 0 | int64_t tsum = 0; |
888 | |
|
889 | 0 | for (i = 0; i < h; i++) { |
890 | 0 | for (j = 0; j < w; j++) { |
891 | 0 | int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12); |
892 | 0 | tsum += diff; |
893 | 0 | tsse += diff * diff; |
894 | 0 | } |
895 | |
|
896 | 0 | pre += pre_stride; |
897 | 0 | wsrc += w; |
898 | 0 | mask += w; |
899 | 0 | } |
900 | 0 | *sse = tsse; |
901 | 0 | *sum = tsum; |
902 | 0 | } |
903 | | |
904 | | static inline void highbd_obmc_variance(const uint8_t *pre8, int pre_stride, |
905 | | const int32_t *wsrc, |
906 | | const int32_t *mask, int w, int h, |
907 | 0 | unsigned int *sse, int *sum) { |
908 | 0 | int64_t sum64; |
909 | 0 | uint64_t sse64; |
910 | 0 | highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64); |
911 | 0 | *sum = (int)sum64; |
912 | 0 | *sse = (unsigned int)sse64; |
913 | 0 | } |
914 | | |
915 | | static inline void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride, |
916 | | const int32_t *wsrc, |
917 | | const int32_t *mask, int w, int h, |
918 | 0 | unsigned int *sse, int *sum) { |
919 | 0 | int64_t sum64; |
920 | 0 | uint64_t sse64; |
921 | 0 | highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64); |
922 | 0 | *sum = (int)ROUND_POWER_OF_TWO(sum64, 2); |
923 | 0 | *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4); |
924 | 0 | } |
925 | | |
926 | | static inline void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride, |
927 | | const int32_t *wsrc, |
928 | | const int32_t *mask, int w, int h, |
929 | 0 | unsigned int *sse, int *sum) { |
930 | 0 | int64_t sum64; |
931 | 0 | uint64_t sse64; |
932 | 0 | highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64); |
933 | 0 | *sum = (int)ROUND_POWER_OF_TWO(sum64, 4); |
934 | 0 | *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8); |
935 | 0 | } |
936 | | |
937 | | #define HIGHBD_OBMC_VAR(W, H) \ |
938 | | unsigned int aom_highbd_8_obmc_variance##W##x##H##_c( \ |
939 | | const uint8_t *pre, int pre_stride, const int32_t *wsrc, \ |
940 | 0 | const int32_t *mask, unsigned int *sse) { \ |
941 | 0 | int sum; \ |
942 | 0 | highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \ |
943 | 0 | return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \ |
944 | 0 | } \ Unexecuted instantiation: aom_highbd_8_obmc_variance4x4_c Unexecuted instantiation: aom_highbd_8_obmc_variance4x8_c Unexecuted instantiation: aom_highbd_8_obmc_variance8x4_c Unexecuted instantiation: aom_highbd_8_obmc_variance8x8_c Unexecuted instantiation: aom_highbd_8_obmc_variance8x16_c Unexecuted instantiation: aom_highbd_8_obmc_variance16x8_c Unexecuted instantiation: aom_highbd_8_obmc_variance16x16_c Unexecuted instantiation: aom_highbd_8_obmc_variance16x32_c Unexecuted instantiation: aom_highbd_8_obmc_variance32x16_c Unexecuted instantiation: aom_highbd_8_obmc_variance32x32_c Unexecuted instantiation: aom_highbd_8_obmc_variance32x64_c Unexecuted instantiation: aom_highbd_8_obmc_variance64x32_c Unexecuted instantiation: aom_highbd_8_obmc_variance64x64_c Unexecuted instantiation: aom_highbd_8_obmc_variance64x128_c Unexecuted instantiation: aom_highbd_8_obmc_variance128x64_c Unexecuted instantiation: aom_highbd_8_obmc_variance128x128_c Unexecuted instantiation: aom_highbd_8_obmc_variance4x16_c Unexecuted instantiation: aom_highbd_8_obmc_variance16x4_c Unexecuted instantiation: aom_highbd_8_obmc_variance8x32_c Unexecuted instantiation: aom_highbd_8_obmc_variance32x8_c Unexecuted instantiation: aom_highbd_8_obmc_variance16x64_c Unexecuted instantiation: aom_highbd_8_obmc_variance64x16_c |
945 | | \ |
946 | | unsigned int aom_highbd_10_obmc_variance##W##x##H##_c( \ |
947 | | const uint8_t *pre, int pre_stride, const int32_t *wsrc, \ |
948 | 0 | const int32_t *mask, unsigned int *sse) { \ |
949 | 0 | int sum; \ |
950 | 0 | int64_t var; \ |
951 | 0 | highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \ |
952 | 0 | var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ |
953 | 0 | return (var >= 0) ? (uint32_t)var : 0; \ |
954 | 0 | } \ Unexecuted instantiation: aom_highbd_10_obmc_variance4x4_c Unexecuted instantiation: aom_highbd_10_obmc_variance4x8_c Unexecuted instantiation: aom_highbd_10_obmc_variance8x4_c Unexecuted instantiation: aom_highbd_10_obmc_variance8x8_c Unexecuted instantiation: aom_highbd_10_obmc_variance8x16_c Unexecuted instantiation: aom_highbd_10_obmc_variance16x8_c Unexecuted instantiation: aom_highbd_10_obmc_variance16x16_c Unexecuted instantiation: aom_highbd_10_obmc_variance16x32_c Unexecuted instantiation: aom_highbd_10_obmc_variance32x16_c Unexecuted instantiation: aom_highbd_10_obmc_variance32x32_c Unexecuted instantiation: aom_highbd_10_obmc_variance32x64_c Unexecuted instantiation: aom_highbd_10_obmc_variance64x32_c Unexecuted instantiation: aom_highbd_10_obmc_variance64x64_c Unexecuted instantiation: aom_highbd_10_obmc_variance64x128_c Unexecuted instantiation: aom_highbd_10_obmc_variance128x64_c Unexecuted instantiation: aom_highbd_10_obmc_variance128x128_c Unexecuted instantiation: aom_highbd_10_obmc_variance4x16_c Unexecuted instantiation: aom_highbd_10_obmc_variance16x4_c Unexecuted instantiation: aom_highbd_10_obmc_variance8x32_c Unexecuted instantiation: aom_highbd_10_obmc_variance32x8_c Unexecuted instantiation: aom_highbd_10_obmc_variance16x64_c Unexecuted instantiation: aom_highbd_10_obmc_variance64x16_c |
955 | | \ |
956 | | unsigned int aom_highbd_12_obmc_variance##W##x##H##_c( \ |
957 | | const uint8_t *pre, int pre_stride, const int32_t *wsrc, \ |
958 | 0 | const int32_t *mask, unsigned int *sse) { \ |
959 | 0 | int sum; \ |
960 | 0 | int64_t var; \ |
961 | 0 | highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \ |
962 | 0 | var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ |
963 | 0 | return (var >= 0) ? (uint32_t)var : 0; \ |
964 | 0 | } Unexecuted instantiation: aom_highbd_12_obmc_variance4x4_c Unexecuted instantiation: aom_highbd_12_obmc_variance4x8_c Unexecuted instantiation: aom_highbd_12_obmc_variance8x4_c Unexecuted instantiation: aom_highbd_12_obmc_variance8x8_c Unexecuted instantiation: aom_highbd_12_obmc_variance8x16_c Unexecuted instantiation: aom_highbd_12_obmc_variance16x8_c Unexecuted instantiation: aom_highbd_12_obmc_variance16x16_c Unexecuted instantiation: aom_highbd_12_obmc_variance16x32_c Unexecuted instantiation: aom_highbd_12_obmc_variance32x16_c Unexecuted instantiation: aom_highbd_12_obmc_variance32x32_c Unexecuted instantiation: aom_highbd_12_obmc_variance32x64_c Unexecuted instantiation: aom_highbd_12_obmc_variance64x32_c Unexecuted instantiation: aom_highbd_12_obmc_variance64x64_c Unexecuted instantiation: aom_highbd_12_obmc_variance64x128_c Unexecuted instantiation: aom_highbd_12_obmc_variance128x64_c Unexecuted instantiation: aom_highbd_12_obmc_variance128x128_c Unexecuted instantiation: aom_highbd_12_obmc_variance4x16_c Unexecuted instantiation: aom_highbd_12_obmc_variance16x4_c Unexecuted instantiation: aom_highbd_12_obmc_variance8x32_c Unexecuted instantiation: aom_highbd_12_obmc_variance32x8_c Unexecuted instantiation: aom_highbd_12_obmc_variance16x64_c Unexecuted instantiation: aom_highbd_12_obmc_variance64x16_c |
965 | | |
966 | | #define HIGHBD_OBMC_SUBPIX_VAR(W, H) \ |
967 | | unsigned int aom_highbd_8_obmc_sub_pixel_variance##W##x##H##_c( \ |
968 | | const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \ |
969 | 0 | const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \ |
970 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
971 | 0 | uint16_t temp2[H * W]; \ |
972 | 0 | \ |
973 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
974 | 0 | pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
975 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
976 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
977 | 0 | \ |
978 | 0 | return aom_highbd_8_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ |
979 | 0 | W, wsrc, mask, sse); \ |
980 | 0 | } \ Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_8_obmc_sub_pixel_variance64x16_c |
981 | | \ |
982 | | unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \ |
983 | | const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \ |
984 | 0 | const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \ |
985 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
986 | 0 | uint16_t temp2[H * W]; \ |
987 | 0 | \ |
988 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
989 | 0 | pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
990 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
991 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
992 | 0 | \ |
993 | 0 | return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ |
994 | 0 | W, wsrc, mask, sse); \ |
995 | 0 | } \ Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_10_obmc_sub_pixel_variance64x16_c |
996 | | \ |
997 | | unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \ |
998 | | const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \ |
999 | 0 | const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \ |
1000 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
1001 | 0 | uint16_t temp2[H * W]; \ |
1002 | 0 | \ |
1003 | 0 | aom_highbd_var_filter_block2d_bil_first_pass( \ |
1004 | 0 | pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
1005 | 0 | aom_highbd_var_filter_block2d_bil_second_pass( \ |
1006 | 0 | fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
1007 | 0 | \ |
1008 | 0 | return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ |
1009 | 0 | W, wsrc, mask, sse); \ |
1010 | 0 | } Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance4x4_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance4x8_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance8x4_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance8x8_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance8x16_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance16x8_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance16x16_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance16x32_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance32x16_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance32x32_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance32x64_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance64x32_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance64x64_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance64x128_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance128x64_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance128x128_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance4x16_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance16x4_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance8x32_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance32x8_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance16x64_c Unexecuted instantiation: aom_highbd_12_obmc_sub_pixel_variance64x16_c |
1011 | | |
1012 | | HIGHBD_OBMC_VAR(4, 4) |
1013 | | HIGHBD_OBMC_SUBPIX_VAR(4, 4) |
1014 | | |
1015 | | HIGHBD_OBMC_VAR(4, 8) |
1016 | | HIGHBD_OBMC_SUBPIX_VAR(4, 8) |
1017 | | |
1018 | | HIGHBD_OBMC_VAR(8, 4) |
1019 | | HIGHBD_OBMC_SUBPIX_VAR(8, 4) |
1020 | | |
1021 | | HIGHBD_OBMC_VAR(8, 8) |
1022 | | HIGHBD_OBMC_SUBPIX_VAR(8, 8) |
1023 | | |
1024 | | HIGHBD_OBMC_VAR(8, 16) |
1025 | | HIGHBD_OBMC_SUBPIX_VAR(8, 16) |
1026 | | |
1027 | | HIGHBD_OBMC_VAR(16, 8) |
1028 | | HIGHBD_OBMC_SUBPIX_VAR(16, 8) |
1029 | | |
1030 | | HIGHBD_OBMC_VAR(16, 16) |
1031 | | HIGHBD_OBMC_SUBPIX_VAR(16, 16) |
1032 | | |
1033 | | HIGHBD_OBMC_VAR(16, 32) |
1034 | | HIGHBD_OBMC_SUBPIX_VAR(16, 32) |
1035 | | |
1036 | | HIGHBD_OBMC_VAR(32, 16) |
1037 | | HIGHBD_OBMC_SUBPIX_VAR(32, 16) |
1038 | | |
1039 | | HIGHBD_OBMC_VAR(32, 32) |
1040 | | HIGHBD_OBMC_SUBPIX_VAR(32, 32) |
1041 | | |
1042 | | HIGHBD_OBMC_VAR(32, 64) |
1043 | | HIGHBD_OBMC_SUBPIX_VAR(32, 64) |
1044 | | |
1045 | | HIGHBD_OBMC_VAR(64, 32) |
1046 | | HIGHBD_OBMC_SUBPIX_VAR(64, 32) |
1047 | | |
1048 | | HIGHBD_OBMC_VAR(64, 64) |
1049 | | HIGHBD_OBMC_SUBPIX_VAR(64, 64) |
1050 | | |
1051 | | HIGHBD_OBMC_VAR(64, 128) |
1052 | | HIGHBD_OBMC_SUBPIX_VAR(64, 128) |
1053 | | |
1054 | | HIGHBD_OBMC_VAR(128, 64) |
1055 | | HIGHBD_OBMC_SUBPIX_VAR(128, 64) |
1056 | | |
1057 | | HIGHBD_OBMC_VAR(128, 128) |
1058 | | HIGHBD_OBMC_SUBPIX_VAR(128, 128) |
1059 | | |
1060 | | HIGHBD_OBMC_VAR(4, 16) |
1061 | | HIGHBD_OBMC_SUBPIX_VAR(4, 16) |
1062 | | HIGHBD_OBMC_VAR(16, 4) |
1063 | | HIGHBD_OBMC_SUBPIX_VAR(16, 4) |
1064 | | HIGHBD_OBMC_VAR(8, 32) |
1065 | | HIGHBD_OBMC_SUBPIX_VAR(8, 32) |
1066 | | HIGHBD_OBMC_VAR(32, 8) |
1067 | | HIGHBD_OBMC_SUBPIX_VAR(32, 8) |
1068 | | HIGHBD_OBMC_VAR(16, 64) |
1069 | | HIGHBD_OBMC_SUBPIX_VAR(16, 64) |
1070 | | HIGHBD_OBMC_VAR(64, 16) |
1071 | | HIGHBD_OBMC_SUBPIX_VAR(64, 16) |
1072 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
1073 | | #endif // !CONFIG_REALTIME_ONLY |
1074 | | |
1075 | | uint64_t aom_mse_wxh_16bit_c(uint8_t *dst, int dstride, uint16_t *src, |
1076 | 0 | int sstride, int w, int h) { |
1077 | 0 | uint64_t sum = 0; |
1078 | 0 | for (int i = 0; i < h; i++) { |
1079 | 0 | for (int j = 0; j < w; j++) { |
1080 | 0 | int e = (uint16_t)dst[i * dstride + j] - src[i * sstride + j]; |
1081 | 0 | sum += e * e; |
1082 | 0 | } |
1083 | 0 | } |
1084 | 0 | return sum; |
1085 | 0 | } |
1086 | | |
1087 | | uint64_t aom_mse_16xh_16bit_c(uint8_t *dst, int dstride, uint16_t *src, int w, |
1088 | 0 | int h) { |
1089 | 0 | uint16_t *src_temp = src; |
1090 | 0 | uint8_t *dst_temp = dst; |
1091 | 0 | const int num_blks = 16 / w; |
1092 | 0 | int64_t sum = 0; |
1093 | 0 | for (int i = 0; i < num_blks; i++) { |
1094 | 0 | sum += aom_mse_wxh_16bit_c(dst_temp, dstride, src_temp, w, w, h); |
1095 | 0 | dst_temp += w; |
1096 | 0 | src_temp += (w * h); |
1097 | 0 | } |
1098 | 0 | return sum; |
1099 | 0 | } |
1100 | | |
1101 | | #if CONFIG_AV1_HIGHBITDEPTH |
1102 | | uint64_t aom_mse_wxh_16bit_highbd_c(uint16_t *dst, int dstride, uint16_t *src, |
1103 | 0 | int sstride, int w, int h) { |
1104 | 0 | uint64_t sum = 0; |
1105 | 0 | for (int i = 0; i < h; i++) { |
1106 | 0 | for (int j = 0; j < w; j++) { |
1107 | 0 | int e = dst[i * dstride + j] - src[i * sstride + j]; |
1108 | 0 | sum += e * e; |
1109 | 0 | } |
1110 | 0 | } |
1111 | 0 | return sum; |
1112 | 0 | } |
1113 | | #endif // CONFIG_AV1_HIGHBITDEPTH |