/src/libvpx/vpx_dsp/vpx_convolve.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <assert.h> |
12 | | #include <string.h> |
13 | | |
14 | | #include "./vpx_config.h" |
15 | | #include "./vpx_dsp_rtcd.h" |
16 | | #include "vpx/vpx_integer.h" |
17 | | #include "vpx_dsp/vpx_convolve.h" |
18 | | #include "vpx_dsp/vpx_dsp_common.h" |
19 | | #include "vpx_dsp/vpx_filter.h" |
20 | | #include "vpx_ports/mem.h" |
21 | | |
22 | | static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, |
23 | | uint8_t *dst, ptrdiff_t dst_stride, |
24 | | const InterpKernel *x_filters, int x0_q4, |
25 | 0 | int x_step_q4, int w, int h) { |
26 | 0 | int x, y; |
27 | 0 | src -= SUBPEL_TAPS / 2 - 1; |
28 | |
|
29 | 0 | for (y = 0; y < h; ++y) { |
30 | 0 | int x_q4 = x0_q4; |
31 | 0 | for (x = 0; x < w; ++x) { |
32 | 0 | const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
33 | 0 | const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
34 | 0 | int k, sum = 0; |
35 | 0 | for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; |
36 | 0 | dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
37 | 0 | x_q4 += x_step_q4; |
38 | 0 | } |
39 | 0 | src += src_stride; |
40 | 0 | dst += dst_stride; |
41 | 0 | } |
42 | 0 | } |
43 | | |
44 | | static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, |
45 | | uint8_t *dst, ptrdiff_t dst_stride, |
46 | | const InterpKernel *x_filters, int x0_q4, |
47 | 0 | int x_step_q4, int w, int h) { |
48 | 0 | int x, y; |
49 | 0 | src -= SUBPEL_TAPS / 2 - 1; |
50 | |
|
51 | 0 | for (y = 0; y < h; ++y) { |
52 | 0 | int x_q4 = x0_q4; |
53 | 0 | for (x = 0; x < w; ++x) { |
54 | 0 | const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
55 | 0 | const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
56 | 0 | int k, sum = 0; |
57 | 0 | for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; |
58 | 0 | dst[x] = ROUND_POWER_OF_TWO( |
59 | 0 | dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
60 | 0 | x_q4 += x_step_q4; |
61 | 0 | } |
62 | 0 | src += src_stride; |
63 | 0 | dst += dst_stride; |
64 | 0 | } |
65 | 0 | } |
66 | | |
67 | | static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, |
68 | | uint8_t *dst, ptrdiff_t dst_stride, |
69 | | const InterpKernel *y_filters, int y0_q4, |
70 | 0 | int y_step_q4, int w, int h) { |
71 | 0 | int x, y; |
72 | 0 | src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
73 | |
|
74 | 0 | for (x = 0; x < w; ++x) { |
75 | 0 | int y_q4 = y0_q4; |
76 | 0 | for (y = 0; y < h; ++y) { |
77 | 0 | const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
78 | 0 | const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
79 | 0 | int k, sum = 0; |
80 | 0 | for (k = 0; k < SUBPEL_TAPS; ++k) |
81 | 0 | sum += src_y[k * src_stride] * y_filter[k]; |
82 | 0 | dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
83 | 0 | y_q4 += y_step_q4; |
84 | 0 | } |
85 | 0 | ++src; |
86 | 0 | ++dst; |
87 | 0 | } |
88 | 0 | } |
89 | | |
90 | | static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, |
91 | | uint8_t *dst, ptrdiff_t dst_stride, |
92 | | const InterpKernel *y_filters, int y0_q4, |
93 | 0 | int y_step_q4, int w, int h) { |
94 | 0 | int x, y; |
95 | 0 | src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
96 | |
|
97 | 0 | for (x = 0; x < w; ++x) { |
98 | 0 | int y_q4 = y0_q4; |
99 | 0 | for (y = 0; y < h; ++y) { |
100 | 0 | const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
101 | 0 | const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
102 | 0 | int k, sum = 0; |
103 | 0 | for (k = 0; k < SUBPEL_TAPS; ++k) |
104 | 0 | sum += src_y[k * src_stride] * y_filter[k]; |
105 | 0 | dst[y * dst_stride] = ROUND_POWER_OF_TWO( |
106 | 0 | dst[y * dst_stride] + |
107 | 0 | clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), |
108 | 0 | 1); |
109 | 0 | y_q4 += y_step_q4; |
110 | 0 | } |
111 | 0 | ++src; |
112 | 0 | ++dst; |
113 | 0 | } |
114 | 0 | } |
115 | | |
116 | | void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
117 | | uint8_t *dst, ptrdiff_t dst_stride, |
118 | | const InterpKernel *filter, int x0_q4, int x_step_q4, |
119 | 0 | int y0_q4, int y_step_q4, int w, int h) { |
120 | 0 | (void)y0_q4; |
121 | 0 | (void)y_step_q4; |
122 | 0 | convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, |
123 | 0 | h); |
124 | 0 | } |
125 | | |
126 | | void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
127 | | uint8_t *dst, ptrdiff_t dst_stride, |
128 | | const InterpKernel *filter, int x0_q4, |
129 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
130 | 0 | int h) { |
131 | 0 | (void)y0_q4; |
132 | 0 | (void)y_step_q4; |
133 | 0 | convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, |
134 | 0 | w, h); |
135 | 0 | } |
136 | | |
137 | | void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
138 | | uint8_t *dst, ptrdiff_t dst_stride, |
139 | | const InterpKernel *filter, int x0_q4, int x_step_q4, |
140 | 0 | int y0_q4, int y_step_q4, int w, int h) { |
141 | 0 | (void)x0_q4; |
142 | 0 | (void)x_step_q4; |
143 | 0 | convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, |
144 | 0 | h); |
145 | 0 | } |
146 | | |
147 | | void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
148 | | uint8_t *dst, ptrdiff_t dst_stride, |
149 | | const InterpKernel *filter, int x0_q4, |
150 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
151 | 0 | int h) { |
152 | 0 | (void)x0_q4; |
153 | 0 | (void)x_step_q4; |
154 | 0 | convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, |
155 | 0 | w, h); |
156 | 0 | } |
157 | | |
158 | | void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, |
159 | | ptrdiff_t dst_stride, const InterpKernel *filter, |
160 | | int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, |
161 | 0 | int h) { |
162 | | // Note: Fixed size intermediate buffer, temp, places limits on parameters. |
163 | | // 2d filtering proceeds in 2 steps: |
164 | | // (1) Interpolate horizontally into an intermediate buffer, temp. |
165 | | // (2) Interpolate temp vertically to derive the sub-pixel result. |
166 | | // Deriving the maximum number of rows in the temp buffer (135): |
167 | | // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). |
168 | | // --Largest block size is 64x64 pixels. |
169 | | // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the |
170 | | // original frame (in 1/16th pixel units). |
171 | | // --Must round-up because block may be located at sub-pixel position. |
172 | | // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. |
173 | | // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. |
174 | | // When calling in frame scaling function, the smallest scaling factor is x1/4 |
175 | | // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still |
176 | | // big enough. |
177 | 0 | uint8_t temp[64 * 135]; |
178 | 0 | const int intermediate_height = |
179 | 0 | (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; |
180 | |
|
181 | 0 | assert(w <= 64); |
182 | 0 | assert(h <= 64); |
183 | 0 | assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32)); |
184 | 0 | assert(x_step_q4 <= 64); |
185 | |
|
186 | 0 | convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, |
187 | 0 | filter, x0_q4, x_step_q4, w, intermediate_height); |
188 | 0 | convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, |
189 | 0 | y0_q4, y_step_q4, w, h); |
190 | 0 | } |
191 | | |
192 | | void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, |
193 | | ptrdiff_t dst_stride, const InterpKernel *filter, |
194 | | int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, |
195 | 0 | int w, int h) { |
196 | | // Fixed size intermediate buffer places limits on parameters. |
197 | 0 | DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); |
198 | 0 | assert(w <= 64); |
199 | 0 | assert(h <= 64); |
200 | |
|
201 | 0 | vpx_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4, |
202 | 0 | y_step_q4, w, h); |
203 | 0 | vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); |
204 | 0 | } |
205 | | |
206 | | void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, |
207 | | ptrdiff_t dst_stride, const InterpKernel *filter, |
208 | | int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, |
209 | 0 | int w, int h) { |
210 | 0 | int r; |
211 | |
|
212 | 0 | (void)filter; |
213 | 0 | (void)x0_q4; |
214 | 0 | (void)x_step_q4; |
215 | 0 | (void)y0_q4; |
216 | 0 | (void)y_step_q4; |
217 | |
|
218 | 0 | for (r = h; r > 0; --r) { |
219 | 0 | memcpy(dst, src, w); |
220 | 0 | src += src_stride; |
221 | 0 | dst += dst_stride; |
222 | 0 | } |
223 | 0 | } |
224 | | |
225 | | void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, |
226 | | ptrdiff_t dst_stride, const InterpKernel *filter, |
227 | | int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, |
228 | 0 | int w, int h) { |
229 | 0 | int x, y; |
230 | |
|
231 | 0 | (void)filter; |
232 | 0 | (void)x0_q4; |
233 | 0 | (void)x_step_q4; |
234 | 0 | (void)y0_q4; |
235 | 0 | (void)y_step_q4; |
236 | |
|
237 | 0 | for (y = 0; y < h; ++y) { |
238 | 0 | for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
239 | 0 | src += src_stride; |
240 | 0 | dst += dst_stride; |
241 | 0 | } |
242 | 0 | } |
243 | | |
244 | | void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, |
245 | | ptrdiff_t dst_stride, const InterpKernel *filter, |
246 | | int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, |
247 | 0 | int w, int h) { |
248 | 0 | vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, |
249 | 0 | x_step_q4, y0_q4, y_step_q4, w, h); |
250 | 0 | } |
251 | | |
252 | | void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, |
253 | | ptrdiff_t dst_stride, const InterpKernel *filter, |
254 | | int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, |
255 | 0 | int w, int h) { |
256 | 0 | vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, |
257 | 0 | x_step_q4, y0_q4, y_step_q4, w, h); |
258 | 0 | } |
259 | | |
260 | | void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, |
261 | | ptrdiff_t dst_stride, const InterpKernel *filter, |
262 | | int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, |
263 | 0 | int h) { |
264 | 0 | vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, |
265 | 0 | y0_q4, y_step_q4, w, h); |
266 | 0 | } |
267 | | |
268 | | void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
269 | | uint8_t *dst, ptrdiff_t dst_stride, |
270 | | const InterpKernel *filter, int x0_q4, |
271 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
272 | 0 | int h) { |
273 | 0 | vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, |
274 | 0 | x_step_q4, y0_q4, y_step_q4, w, h); |
275 | 0 | } |
276 | | |
277 | | void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
278 | | uint8_t *dst, ptrdiff_t dst_stride, |
279 | | const InterpKernel *filter, int x0_q4, int x_step_q4, |
280 | 0 | int y0_q4, int y_step_q4, int w, int h) { |
281 | 0 | vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, |
282 | 0 | x_step_q4, y0_q4, y_step_q4, w, h); |
283 | 0 | } |
284 | | |
285 | | void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, |
286 | | ptrdiff_t dst_stride, const InterpKernel *filter, |
287 | | int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, |
288 | 0 | int w, int h) { |
289 | 0 | vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4, |
290 | 0 | x_step_q4, y0_q4, y_step_q4, w, h); |
291 | 0 | } |
292 | | |
293 | | #if CONFIG_VP9_HIGHBITDEPTH |
294 | | static void highbd_convolve_horiz(const uint16_t *src, ptrdiff_t src_stride, |
295 | | uint16_t *dst, ptrdiff_t dst_stride, |
296 | | const InterpKernel *x_filters, int x0_q4, |
297 | 0 | int x_step_q4, int w, int h, int bd) { |
298 | 0 | int x, y; |
299 | 0 | src -= SUBPEL_TAPS / 2 - 1; |
300 | |
|
301 | 0 | for (y = 0; y < h; ++y) { |
302 | 0 | int x_q4 = x0_q4; |
303 | 0 | for (x = 0; x < w; ++x) { |
304 | 0 | const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
305 | 0 | const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
306 | 0 | int k, sum = 0; |
307 | 0 | for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; |
308 | 0 | dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
309 | 0 | x_q4 += x_step_q4; |
310 | 0 | } |
311 | 0 | src += src_stride; |
312 | 0 | dst += dst_stride; |
313 | 0 | } |
314 | 0 | } |
315 | | |
316 | | static void highbd_convolve_avg_horiz(const uint16_t *src, ptrdiff_t src_stride, |
317 | | uint16_t *dst, ptrdiff_t dst_stride, |
318 | | const InterpKernel *x_filters, int x0_q4, |
319 | 0 | int x_step_q4, int w, int h, int bd) { |
320 | 0 | int x, y; |
321 | 0 | src -= SUBPEL_TAPS / 2 - 1; |
322 | |
|
323 | 0 | for (y = 0; y < h; ++y) { |
324 | 0 | int x_q4 = x0_q4; |
325 | 0 | for (x = 0; x < w; ++x) { |
326 | 0 | const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
327 | 0 | const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
328 | 0 | int k, sum = 0; |
329 | 0 | for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; |
330 | 0 | dst[x] = ROUND_POWER_OF_TWO( |
331 | 0 | dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), |
332 | 0 | 1); |
333 | 0 | x_q4 += x_step_q4; |
334 | 0 | } |
335 | 0 | src += src_stride; |
336 | 0 | dst += dst_stride; |
337 | 0 | } |
338 | 0 | } |
339 | | |
340 | | static void highbd_convolve_vert(const uint16_t *src, ptrdiff_t src_stride, |
341 | | uint16_t *dst, ptrdiff_t dst_stride, |
342 | | const InterpKernel *y_filters, int y0_q4, |
343 | 0 | int y_step_q4, int w, int h, int bd) { |
344 | 0 | int x, y; |
345 | 0 | src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
346 | |
|
347 | 0 | for (x = 0; x < w; ++x) { |
348 | 0 | int y_q4 = y0_q4; |
349 | 0 | for (y = 0; y < h; ++y) { |
350 | 0 | const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
351 | 0 | const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
352 | 0 | int k, sum = 0; |
353 | 0 | for (k = 0; k < SUBPEL_TAPS; ++k) |
354 | 0 | sum += src_y[k * src_stride] * y_filter[k]; |
355 | 0 | dst[y * dst_stride] = |
356 | 0 | clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
357 | 0 | y_q4 += y_step_q4; |
358 | 0 | } |
359 | 0 | ++src; |
360 | 0 | ++dst; |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | | static void highbd_convolve_avg_vert(const uint16_t *src, ptrdiff_t src_stride, |
365 | | uint16_t *dst, ptrdiff_t dst_stride, |
366 | | const InterpKernel *y_filters, int y0_q4, |
367 | 0 | int y_step_q4, int w, int h, int bd) { |
368 | 0 | int x, y; |
369 | 0 | src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
370 | |
|
371 | 0 | for (x = 0; x < w; ++x) { |
372 | 0 | int y_q4 = y0_q4; |
373 | 0 | for (y = 0; y < h; ++y) { |
374 | 0 | const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
375 | 0 | const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
376 | 0 | int k, sum = 0; |
377 | 0 | for (k = 0; k < SUBPEL_TAPS; ++k) |
378 | 0 | sum += src_y[k * src_stride] * y_filter[k]; |
379 | 0 | dst[y * dst_stride] = ROUND_POWER_OF_TWO( |
380 | 0 | dst[y * dst_stride] + |
381 | 0 | clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), |
382 | 0 | 1); |
383 | 0 | y_q4 += y_step_q4; |
384 | 0 | } |
385 | 0 | ++src; |
386 | 0 | ++dst; |
387 | 0 | } |
388 | 0 | } |
389 | | |
390 | | static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride, |
391 | | uint16_t *dst, ptrdiff_t dst_stride, |
392 | | const InterpKernel *filter, int x0_q4, |
393 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
394 | 0 | int h, int bd) { |
395 | | // Note: Fixed size intermediate buffer, temp, places limits on parameters. |
396 | | // 2d filtering proceeds in 2 steps: |
397 | | // (1) Interpolate horizontally into an intermediate buffer, temp. |
398 | | // (2) Interpolate temp vertically to derive the sub-pixel result. |
399 | | // Deriving the maximum number of rows in the temp buffer (135): |
400 | | // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). |
401 | | // --Largest block size is 64x64 pixels. |
402 | | // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the |
403 | | // original frame (in 1/16th pixel units). |
404 | | // --Must round-up because block may be located at sub-pixel position. |
405 | | // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. |
406 | | // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. |
407 | 0 | uint16_t temp[64 * 135]; |
408 | 0 | const int intermediate_height = |
409 | 0 | (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; |
410 | |
|
411 | 0 | assert(w <= 64); |
412 | 0 | assert(h <= 64); |
413 | 0 | assert(y_step_q4 <= 32); |
414 | 0 | assert(x_step_q4 <= 32); |
415 | |
|
416 | 0 | highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, |
417 | 0 | temp, 64, filter, x0_q4, x_step_q4, w, |
418 | 0 | intermediate_height, bd); |
419 | 0 | highbd_convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, |
420 | 0 | filter, y0_q4, y_step_q4, w, h, bd); |
421 | 0 | } |
422 | | |
423 | | void vpx_highbd_convolve8_horiz_c(const uint16_t *src, ptrdiff_t src_stride, |
424 | | uint16_t *dst, ptrdiff_t dst_stride, |
425 | | const InterpKernel *filter, int x0_q4, |
426 | | int x_step_q4, int y0_q4, int y_step_q4, |
427 | 0 | int w, int h, int bd) { |
428 | 0 | (void)y0_q4; |
429 | 0 | (void)y_step_q4; |
430 | |
|
431 | 0 | highbd_convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, |
432 | 0 | x_step_q4, w, h, bd); |
433 | 0 | } |
434 | | |
435 | | void vpx_highbd_convolve8_avg_horiz_c(const uint16_t *src, ptrdiff_t src_stride, |
436 | | uint16_t *dst, ptrdiff_t dst_stride, |
437 | | const InterpKernel *filter, int x0_q4, |
438 | | int x_step_q4, int y0_q4, int y_step_q4, |
439 | 0 | int w, int h, int bd) { |
440 | 0 | (void)y0_q4; |
441 | 0 | (void)y_step_q4; |
442 | |
|
443 | 0 | highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, |
444 | 0 | x_step_q4, w, h, bd); |
445 | 0 | } |
446 | | |
447 | | void vpx_highbd_convolve8_vert_c(const uint16_t *src, ptrdiff_t src_stride, |
448 | | uint16_t *dst, ptrdiff_t dst_stride, |
449 | | const InterpKernel *filter, int x0_q4, |
450 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
451 | 0 | int h, int bd) { |
452 | 0 | (void)x0_q4; |
453 | 0 | (void)x_step_q4; |
454 | |
|
455 | 0 | highbd_convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, |
456 | 0 | y_step_q4, w, h, bd); |
457 | 0 | } |
458 | | |
459 | | void vpx_highbd_convolve8_avg_vert_c(const uint16_t *src, ptrdiff_t src_stride, |
460 | | uint16_t *dst, ptrdiff_t dst_stride, |
461 | | const InterpKernel *filter, int x0_q4, |
462 | | int x_step_q4, int y0_q4, int y_step_q4, |
463 | 0 | int w, int h, int bd) { |
464 | 0 | (void)x0_q4; |
465 | 0 | (void)x_step_q4; |
466 | |
|
467 | 0 | highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, |
468 | 0 | y_step_q4, w, h, bd); |
469 | 0 | } |
470 | | |
471 | | void vpx_highbd_convolve8_c(const uint16_t *src, ptrdiff_t src_stride, |
472 | | uint16_t *dst, ptrdiff_t dst_stride, |
473 | | const InterpKernel *filter, int x0_q4, |
474 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
475 | 0 | int h, int bd) { |
476 | 0 | highbd_convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, |
477 | 0 | y0_q4, y_step_q4, w, h, bd); |
478 | 0 | } |
479 | | |
480 | | void vpx_highbd_convolve8_avg_c(const uint16_t *src, ptrdiff_t src_stride, |
481 | | uint16_t *dst, ptrdiff_t dst_stride, |
482 | | const InterpKernel *filter, int x0_q4, |
483 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
484 | 0 | int h, int bd) { |
485 | | // Fixed size intermediate buffer places limits on parameters. |
486 | 0 | DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]); |
487 | 0 | assert(w <= 64); |
488 | 0 | assert(h <= 64); |
489 | |
|
490 | 0 | vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, |
491 | 0 | y0_q4, y_step_q4, w, h, bd); |
492 | 0 | vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h, |
493 | 0 | bd); |
494 | 0 | } |
495 | | |
496 | | void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride, |
497 | | uint16_t *dst, ptrdiff_t dst_stride, |
498 | | const InterpKernel *filter, int x0_q4, |
499 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
500 | 0 | int h, int bd) { |
501 | 0 | int r; |
502 | |
|
503 | 0 | (void)filter; |
504 | 0 | (void)x0_q4; |
505 | 0 | (void)x_step_q4; |
506 | 0 | (void)y0_q4; |
507 | 0 | (void)y_step_q4; |
508 | 0 | (void)bd; |
509 | |
|
510 | 0 | for (r = h; r > 0; --r) { |
511 | 0 | memcpy(dst, src, w * sizeof(uint16_t)); |
512 | 0 | src += src_stride; |
513 | 0 | dst += dst_stride; |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | void vpx_highbd_convolve_avg_c(const uint16_t *src, ptrdiff_t src_stride, |
518 | | uint16_t *dst, ptrdiff_t dst_stride, |
519 | | const InterpKernel *filter, int x0_q4, |
520 | | int x_step_q4, int y0_q4, int y_step_q4, int w, |
521 | 0 | int h, int bd) { |
522 | 0 | int x, y; |
523 | |
|
524 | 0 | (void)filter; |
525 | 0 | (void)x0_q4; |
526 | 0 | (void)x_step_q4; |
527 | 0 | (void)y0_q4; |
528 | 0 | (void)y_step_q4; |
529 | 0 | (void)bd; |
530 | |
|
531 | 0 | for (y = 0; y < h; ++y) { |
532 | 0 | for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
533 | 0 | src += src_stride; |
534 | 0 | dst += dst_stride; |
535 | 0 | } |
536 | 0 | } |
537 | | #endif |