/work/svt-av1/Source/Lib/C_DEFAULT/variance.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include <assert.h> |
13 | | #include <stdlib.h> |
14 | | #include <string.h> |
15 | | |
16 | | #include "pcs.h" |
17 | | #include "convolve.h" |
18 | | #include "aom_dsp_rtcd.h" |
19 | | #include "inter_prediction.h" |
20 | | |
21 | | // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal |
22 | | // or vertical direction to produce the filtered output block. Used to implement |
23 | | // the first-pass of 2-D separable filter. |
24 | | // |
25 | | // Produces int16_t output to retain precision for the next pass. Two filter |
26 | | // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is |
27 | | // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). |
28 | | // It defines the offset required to move from one input to the next. |
29 | | static void aom_var_filter_block2d_bil_first_pass_c(const uint8_t* a, uint16_t* b, unsigned int src_pixels_per_line, |
30 | | unsigned int pixel_step, unsigned int output_height, |
31 | 0 | unsigned int output_width, const uint8_t* filter) { |
32 | 0 | unsigned int i, j; |
33 | |
|
34 | 0 | for (i = 0; i < output_height; ++i) { |
35 | 0 | for (j = 0; j < output_width; ++j) { |
36 | 0 | b[j] = ROUND_POWER_OF_TWO((int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); |
37 | |
|
38 | 0 | ++a; |
39 | 0 | } |
40 | |
|
41 | 0 | a += src_pixels_per_line - output_width; |
42 | 0 | b += output_width; |
43 | 0 | } |
44 | 0 | } |
45 | | |
46 | | // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal |
47 | | // or vertical direction to produce the filtered output block. Used to implement |
48 | | // the second-pass of 2-D separable filter. |
49 | | // |
50 | | // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two |
51 | | // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the |
52 | | // filter is applied horizontally (pixel_step = 1) or vertically |
53 | | // (pixel_step = stride). It defines the offset required to move from one input |
54 | | // to the next. Output is 8-bit. |
55 | | static void aom_var_filter_block2d_bil_second_pass_c(const uint16_t* a, uint8_t* b, unsigned int src_pixels_per_line, |
56 | | unsigned int pixel_step, unsigned int output_height, |
57 | 0 | unsigned int output_width, const uint8_t* filter) { |
58 | 0 | unsigned int i, j; |
59 | |
|
60 | 0 | for (i = 0; i < output_height; ++i) { |
61 | 0 | for (j = 0; j < output_width; ++j) { |
62 | 0 | b[j] = ROUND_POWER_OF_TWO((int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); |
63 | 0 | ++a; |
64 | 0 | } |
65 | |
|
66 | 0 | a += src_pixels_per_line - output_width; |
67 | 0 | b += output_width; |
68 | 0 | } |
69 | 0 | } |
70 | | |
71 | 0 | static INLINE const InterpFilterParams* av1_get_filter(int subpel_search) { |
72 | 0 | assert(subpel_search >= USE_2_TAPS); |
73 | |
|
74 | 0 | switch (subpel_search) { |
75 | 0 | case USE_2_TAPS: |
76 | 0 | return &av1_interp_filter_params_list[BILINEAR]; |
77 | 0 | case USE_4_TAPS: |
78 | 0 | return &av1_interp_4tap[EIGHTTAP_REGULAR]; |
79 | 0 | case USE_8_TAPS: |
80 | 0 | return &av1_interp_filter_params_list[EIGHTTAP_REGULAR]; |
81 | 0 | default: |
82 | 0 | assert(0); |
83 | 0 | return NULL; |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | | // Get pred block from up-sampled reference. |
88 | | void svt_aom_upsampled_pred_c(MacroBlockD* xd, const struct AV1Common* const cm, int mi_row, int mi_col, |
89 | | const Mv* const mv, uint8_t* comp_pred, int width, int height, int subpel_x_q3, |
90 | 0 | int subpel_y_q3, const uint8_t* ref, int ref_stride, int subpel_search) { |
91 | 0 | (void)xd; |
92 | 0 | (void)cm; |
93 | 0 | (void)mi_row; |
94 | 0 | (void)mi_col; |
95 | 0 | (void)mv; |
96 | 0 | const InterpFilterParams* filter = av1_get_filter(subpel_search); |
97 | 0 | assert(filter != NULL); |
98 | 0 | if (!subpel_x_q3 && !subpel_y_q3) { |
99 | 0 | for (int i = 0; i < height; i++) { |
100 | 0 | svt_memcpy(comp_pred, ref, width * sizeof(*comp_pred)); |
101 | 0 | comp_pred += width; |
102 | 0 | ref += ref_stride; |
103 | 0 | } |
104 | 0 | } else if (!subpel_y_q3) { |
105 | 0 | const int16_t* const kernel = av1_get_interp_filter_subpel_kernel(*filter, subpel_x_q3 << 1); |
106 | 0 | svt_aom_convolve8_horiz_c(ref, ref_stride, comp_pred, width, kernel, 16, NULL, -1, width, height); |
107 | 0 | } else if (!subpel_x_q3) { |
108 | 0 | const int16_t* const kernel = av1_get_interp_filter_subpel_kernel(*filter, subpel_y_q3 << 1); |
109 | 0 | svt_aom_convolve8_vert_c(ref, ref_stride, comp_pred, width, NULL, -1, kernel, 16, width, height); |
110 | 0 | } else { |
111 | 0 | DECLARE_ALIGNED(16, uint8_t, temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]); |
112 | 0 | const int16_t* const kernel_x = av1_get_interp_filter_subpel_kernel(*filter, subpel_x_q3 << 1); |
113 | 0 | const int16_t* const kernel_y = av1_get_interp_filter_subpel_kernel(*filter, subpel_y_q3 << 1); |
114 | 0 | const int intermediate_height = (((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps; |
115 | 0 | assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16); |
116 | 0 | svt_aom_convolve8_horiz_c(ref - ref_stride * ((filter->taps >> 1) - 1), |
117 | 0 | ref_stride, |
118 | 0 | temp, |
119 | 0 | MAX_SB_SIZE, |
120 | 0 | kernel_x, |
121 | 0 | 16, |
122 | 0 | NULL, |
123 | 0 | -1, |
124 | 0 | width, |
125 | 0 | intermediate_height); |
126 | 0 | svt_aom_convolve8_vert_c(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1), |
127 | 0 | MAX_SB_SIZE, |
128 | 0 | comp_pred, |
129 | 0 | width, |
130 | 0 | NULL, |
131 | 0 | -1, |
132 | 0 | kernel_y, |
133 | 0 | 16, |
134 | 0 | width, |
135 | 0 | height); |
136 | 0 | } |
137 | 0 | } |
138 | | |
139 | | // functions are from deleted file, associated with this macro |
140 | | // Moved from EbComputeVariance_C.c |
141 | | static void variance_c(const uint8_t* a, int a_stride, const uint8_t* b, int b_stride, int w, int h, uint32_t* sse, |
142 | 246k | int* sum) { |
143 | 246k | int i, j; |
144 | | |
145 | 246k | *sum = 0; |
146 | 246k | *sse = 0; |
147 | | |
148 | 2.34M | for (i = 0; i < h; ++i) { |
149 | 28.3M | for (j = 0; j < w; ++j) { |
150 | 26.2M | const int diff = a[j] - b[j]; |
151 | 26.2M | *sum += diff; |
152 | 26.2M | *sse += diff * diff; |
153 | 26.2M | } |
154 | | |
155 | 2.09M | a += a_stride; |
156 | 2.09M | b += b_stride; |
157 | 2.09M | } |
158 | 246k | } |
159 | | |
160 | | // Moved from EbComputeVariance_C.c |
161 | | // TODO: use or implement a simd version of this |
162 | | uint32_t svt_aom_variance_highbd_c(const uint16_t* a, int a_stride, const uint16_t* b, int b_stride, int w, int h, |
163 | 0 | uint32_t* sse) { |
164 | 0 | int i, j; |
165 | |
|
166 | 0 | int sad = 0; |
167 | 0 | *sse = 0; |
168 | |
|
169 | 0 | for (i = 0; i < h; ++i) { |
170 | 0 | for (j = 0; j < w; ++j) { |
171 | 0 | const int diff = a[j] - b[j]; |
172 | 0 | sad += diff; |
173 | 0 | *sse += diff * diff; |
174 | 0 | } |
175 | |
|
176 | 0 | a += a_stride; |
177 | 0 | b += b_stride; |
178 | 0 | } |
179 | |
|
180 | 0 | return *sse - ((int64_t)sad * sad) / (w * h); |
181 | 0 | } |
182 | | |
183 | | // Moved from EbComputeVariance_C.c |
184 | | #define VAR(W, H) \ |
185 | | uint32_t svt_aom_variance##W##x##H##_c( \ |
186 | 247k | const uint8_t* a, int a_stride, const uint8_t* b, int b_stride, uint32_t* sse) { \ |
187 | 247k | int sum; \ |
188 | 247k | variance_c(a, a_stride, b, b_stride, W, H, sse, &sum); \ |
189 | 247k | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ |
190 | 247k | } Unexecuted instantiation: svt_aom_variance128x128_c Unexecuted instantiation: svt_aom_variance128x64_c Unexecuted instantiation: svt_aom_variance64x128_c Line | Count | Source | 186 | 2.77k | const uint8_t* a, int a_stride, const uint8_t* b, int b_stride, uint32_t* sse) { \ | 187 | 2.77k | int sum; \ | 188 | 2.77k | variance_c(a, a_stride, b, b_stride, W, H, sse, &sum); \ | 189 | 2.77k | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ | 190 | 2.77k | } |
Unexecuted instantiation: svt_aom_variance64x32_c Unexecuted instantiation: svt_aom_variance32x64_c Line | Count | Source | 186 | 543 | const uint8_t* a, int a_stride, const uint8_t* b, int b_stride, uint32_t* sse) { \ | 187 | 543 | int sum; \ | 188 | 543 | variance_c(a, a_stride, b, b_stride, W, H, sse, &sum); \ | 189 | 543 | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ | 190 | 543 | } |
Unexecuted instantiation: svt_aom_variance32x16_c Unexecuted instantiation: svt_aom_variance16x32_c Line | Count | Source | 186 | 1.41k | const uint8_t* a, int a_stride, const uint8_t* b, int b_stride, uint32_t* sse) { \ | 187 | 1.41k | int sum; \ | 188 | 1.41k | variance_c(a, a_stride, b, b_stride, W, H, sse, &sum); \ | 189 | 1.41k | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ | 190 | 1.41k | } |
Unexecuted instantiation: svt_aom_variance16x8_c Unexecuted instantiation: svt_aom_variance8x16_c Line | Count | Source | 186 | 242k | const uint8_t* a, int a_stride, const uint8_t* b, int b_stride, uint32_t* sse) { \ | 187 | 242k | int sum; \ | 188 | 242k | variance_c(a, a_stride, b, b_stride, W, H, sse, &sum); \ | 189 | 242k | return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ | 190 | 242k | } |
Unexecuted instantiation: svt_aom_variance8x4_c Unexecuted instantiation: svt_aom_variance4x8_c Unexecuted instantiation: svt_aom_variance4x4_c Unexecuted instantiation: svt_aom_variance4x16_c Unexecuted instantiation: svt_aom_variance16x4_c Unexecuted instantiation: svt_aom_variance8x32_c Unexecuted instantiation: svt_aom_variance32x8_c Unexecuted instantiation: svt_aom_variance16x64_c Unexecuted instantiation: svt_aom_variance64x16_c |
191 | | |
192 | | #define SUBPIX_VAR(W, H) \ |
193 | | uint32_t svt_aom_sub_pixel_variance##W##x##H##_c( \ |
194 | 0 | const uint8_t* a, int a_stride, int xoffset, int yoffset, const uint8_t* b, int b_stride, uint32_t* sse) { \ |
195 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
196 | 0 | uint8_t temp2[H * W]; \ |
197 | 0 | \ |
198 | 0 | aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
199 | 0 | aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
200 | 0 | \ |
201 | 0 | return svt_aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \ |
202 | 0 | } Unexecuted instantiation: svt_aom_sub_pixel_variance128x128_c Unexecuted instantiation: svt_aom_sub_pixel_variance128x64_c Unexecuted instantiation: svt_aom_sub_pixel_variance64x128_c Unexecuted instantiation: svt_aom_sub_pixel_variance64x64_c Unexecuted instantiation: svt_aom_sub_pixel_variance64x32_c Unexecuted instantiation: svt_aom_sub_pixel_variance32x64_c Unexecuted instantiation: svt_aom_sub_pixel_variance32x32_c Unexecuted instantiation: svt_aom_sub_pixel_variance32x16_c Unexecuted instantiation: svt_aom_sub_pixel_variance16x32_c Unexecuted instantiation: svt_aom_sub_pixel_variance16x16_c Unexecuted instantiation: svt_aom_sub_pixel_variance16x8_c Unexecuted instantiation: svt_aom_sub_pixel_variance8x16_c Unexecuted instantiation: svt_aom_sub_pixel_variance8x8_c Unexecuted instantiation: svt_aom_sub_pixel_variance8x4_c Unexecuted instantiation: svt_aom_sub_pixel_variance4x8_c Unexecuted instantiation: svt_aom_sub_pixel_variance4x4_c Unexecuted instantiation: svt_aom_sub_pixel_variance4x16_c Unexecuted instantiation: svt_aom_sub_pixel_variance16x4_c Unexecuted instantiation: svt_aom_sub_pixel_variance8x32_c Unexecuted instantiation: svt_aom_sub_pixel_variance32x8_c Unexecuted instantiation: svt_aom_sub_pixel_variance16x64_c Unexecuted instantiation: svt_aom_sub_pixel_variance64x16_c |
203 | | |
204 | | /* All the variance are available in the same sizes. */ |
205 | | #define VARIANCES(W, H) \ |
206 | | VAR(W, H) \ |
207 | | SUBPIX_VAR(W, H) |
208 | | VARIANCES(128, 128) |
209 | | VARIANCES(128, 64) |
210 | | VARIANCES(64, 128) |
211 | | VARIANCES(64, 64) |
212 | | VARIANCES(64, 32) |
213 | | VARIANCES(32, 64) |
214 | | VARIANCES(32, 32) |
215 | | VARIANCES(32, 16) |
216 | | VARIANCES(16, 32) |
217 | | VARIANCES(16, 16) |
218 | | VARIANCES(16, 8) |
219 | | VARIANCES(8, 16) |
220 | | VARIANCES(8, 8) |
221 | | VARIANCES(8, 4) |
222 | | VARIANCES(4, 8) |
223 | | VARIANCES(4, 4) |
224 | | VARIANCES(4, 16) |
225 | | VARIANCES(16, 4) |
226 | | VARIANCES(8, 32) |
227 | | VARIANCES(32, 8) |
228 | | VARIANCES(16, 64) |
229 | | VARIANCES(64, 16) |
230 | | |
231 | | static INLINE void obmc_variance(const uint8_t* pre, int pre_stride, const int32_t* wsrc, const int32_t* mask, int w, |
232 | | int h, unsigned int* sse, int* sum) { |
233 | | int i, j; |
234 | | |
235 | | *sse = 0; |
236 | | *sum = 0; |
237 | | |
238 | | for (i = 0; i < h; i++) { |
239 | | for (j = 0; j < w; j++) { |
240 | | int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12); |
241 | | *sum += diff; |
242 | | *sse += diff * diff; |
243 | | } |
244 | | |
245 | | pre += pre_stride; |
246 | | wsrc += w; |
247 | | mask += w; |
248 | | } |
249 | | } |
250 | | |
251 | | #define OBMC_VAR(W, H) \ |
252 | | unsigned int svt_aom_obmc_variance##W##x##H##_c( \ |
253 | 0 | const uint8_t* pre, int pre_stride, const int32_t* wsrc, const int32_t* mask, unsigned int* sse) { \ |
254 | 0 | int sum; \ |
255 | 0 | obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \ |
256 | 0 | return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \ |
257 | 0 | } Unexecuted instantiation: svt_aom_obmc_variance4x4_c Unexecuted instantiation: svt_aom_obmc_variance4x8_c Unexecuted instantiation: svt_aom_obmc_variance8x4_c Unexecuted instantiation: svt_aom_obmc_variance8x8_c Unexecuted instantiation: svt_aom_obmc_variance8x16_c Unexecuted instantiation: svt_aom_obmc_variance16x8_c Unexecuted instantiation: svt_aom_obmc_variance16x16_c Unexecuted instantiation: svt_aom_obmc_variance16x32_c Unexecuted instantiation: svt_aom_obmc_variance32x16_c Unexecuted instantiation: svt_aom_obmc_variance32x32_c Unexecuted instantiation: svt_aom_obmc_variance32x64_c Unexecuted instantiation: svt_aom_obmc_variance64x32_c Unexecuted instantiation: svt_aom_obmc_variance64x64_c Unexecuted instantiation: svt_aom_obmc_variance64x128_c Unexecuted instantiation: svt_aom_obmc_variance128x64_c Unexecuted instantiation: svt_aom_obmc_variance128x128_c Unexecuted instantiation: svt_aom_obmc_variance4x16_c Unexecuted instantiation: svt_aom_obmc_variance16x4_c Unexecuted instantiation: svt_aom_obmc_variance8x32_c Unexecuted instantiation: svt_aom_obmc_variance32x8_c Unexecuted instantiation: svt_aom_obmc_variance16x64_c Unexecuted instantiation: svt_aom_obmc_variance64x16_c |
258 | | |
259 | | #define OBMC_SUBPIX_VAR(W, H) \ |
260 | | unsigned int svt_aom_obmc_sub_pixel_variance##W##x##H##_c(const uint8_t* pre, \ |
261 | | int pre_stride, \ |
262 | | int xoffset, \ |
263 | | int yoffset, \ |
264 | | const int32_t* wsrc, \ |
265 | | const int32_t* mask, \ |
266 | 0 | unsigned int* sse) { \ |
267 | 0 | uint16_t fdata3[(H + 1) * W]; \ |
268 | 0 | uint8_t temp2[H * W]; \ |
269 | 0 | \ |
270 | 0 | aom_var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ |
271 | 0 | aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ |
272 | 0 | \ |
273 | 0 | return svt_aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse); \ |
274 | 0 | } Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance4x4_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance4x8_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance8x4_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance8x8_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance8x16_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance16x8_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance16x16_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance16x32_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance32x16_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance32x32_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance32x64_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance64x32_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance64x64_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance64x128_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance128x64_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance128x128_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance4x16_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance16x4_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance8x32_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance32x8_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance16x64_c Unexecuted instantiation: svt_aom_obmc_sub_pixel_variance64x16_c |
275 | | |
276 | | OBMC_VAR(4, 4) |
277 | | OBMC_SUBPIX_VAR(4, 4) |
278 | | |
279 | | OBMC_VAR(4, 8) |
280 | | OBMC_SUBPIX_VAR(4, 8) |
281 | | |
282 | | OBMC_VAR(8, 4) |
283 | | OBMC_SUBPIX_VAR(8, 4) |
284 | | |
285 | | OBMC_VAR(8, 8) |
286 | | OBMC_SUBPIX_VAR(8, 8) |
287 | | |
288 | | OBMC_VAR(8, 16) |
289 | | OBMC_SUBPIX_VAR(8, 16) |
290 | | |
291 | | OBMC_VAR(16, 8) |
292 | | OBMC_SUBPIX_VAR(16, 8) |
293 | | |
294 | | OBMC_VAR(16, 16) |
295 | | OBMC_SUBPIX_VAR(16, 16) |
296 | | |
297 | | OBMC_VAR(16, 32) |
298 | | OBMC_SUBPIX_VAR(16, 32) |
299 | | |
300 | | OBMC_VAR(32, 16) |
301 | | OBMC_SUBPIX_VAR(32, 16) |
302 | | |
303 | | OBMC_VAR(32, 32) |
304 | | OBMC_SUBPIX_VAR(32, 32) |
305 | | |
306 | | OBMC_VAR(32, 64) |
307 | | OBMC_SUBPIX_VAR(32, 64) |
308 | | |
309 | | OBMC_VAR(64, 32) |
310 | | OBMC_SUBPIX_VAR(64, 32) |
311 | | |
312 | | OBMC_VAR(64, 64) |
313 | | OBMC_SUBPIX_VAR(64, 64) |
314 | | |
315 | | OBMC_VAR(64, 128) |
316 | | OBMC_SUBPIX_VAR(64, 128) |
317 | | |
318 | | OBMC_VAR(128, 64) |
319 | | OBMC_SUBPIX_VAR(128, 64) |
320 | | |
321 | | OBMC_VAR(128, 128) |
322 | | OBMC_SUBPIX_VAR(128, 128) |
323 | | |
324 | | OBMC_VAR(4, 16) |
325 | | OBMC_SUBPIX_VAR(4, 16) |
326 | | OBMC_VAR(16, 4) |
327 | | OBMC_SUBPIX_VAR(16, 4) |
328 | | OBMC_VAR(8, 32) |
329 | | OBMC_SUBPIX_VAR(8, 32) |
330 | | OBMC_VAR(32, 8) |
331 | | OBMC_SUBPIX_VAR(32, 8) |
332 | | OBMC_VAR(16, 64) |
333 | | OBMC_SUBPIX_VAR(16, 64) |
334 | | OBMC_VAR(64, 16) |
335 | | OBMC_SUBPIX_VAR(64, 16) |
336 | | |
337 | | uint32_t svt_aom_highbd_mse16x16_c(const uint8_t* src_ptr, int32_t source_stride, const uint8_t* ref_ptr, |
338 | 0 | int32_t recon_stride) { |
339 | 0 | const uint16_t* a = CONVERT_TO_SHORTPTR(src_ptr); |
340 | 0 | const uint16_t* b = CONVERT_TO_SHORTPTR(ref_ptr); |
341 | 0 | uint64_t tsse = 0; |
342 | |
|
343 | 0 | for (int i = 0; i < 16; ++i) { |
344 | 0 | for (int j = 0; j < 16; ++j) { |
345 | 0 | const int diff = a[j] - b[j]; |
346 | 0 | tsse += (uint32_t)(diff * diff); |
347 | 0 | } |
348 | 0 | a += source_stride; |
349 | 0 | b += recon_stride; |
350 | 0 | } |
351 | 0 | return (uint32_t)tsse; |
352 | 0 | } |