/src/aom/aom_dsp/blend_a64_mask.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <assert.h> |
13 | | |
14 | | #include "aom/aom_integer.h" |
15 | | #include "aom_ports/mem.h" |
16 | | #include "aom_dsp/blend.h" |
17 | | #include "aom_dsp/aom_dsp_common.h" |
18 | | |
19 | | #include "config/aom_dsp_rtcd.h" |
20 | | |
21 | | // Blending with alpha mask. Mask values come from the range [0, 64], |
22 | | // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can |
23 | | // be the same as dst, or dst can be different from both sources. |
24 | | |
25 | | // NOTE(rachelbarker): The input and output of aom_blend_a64_d16_mask_c() are |
26 | | // in a higher intermediate precision, and will later be rounded down to pixel |
27 | | // precision. |
28 | | // Thus, in order to avoid double-rounding, we want to use normal right shifts |
29 | | // within this function, not ROUND_POWER_OF_TWO. |
30 | | // This works because of the identity: |
31 | | // ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z) |
32 | | // |
33 | | // In contrast, the output of the non-d16 functions will not be further rounded, |
34 | | // so we *should* use ROUND_POWER_OF_TWO there. |
35 | | |
36 | | void aom_lowbd_blend_a64_d16_mask_c( |
37 | | uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, |
38 | | uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, |
39 | | const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, |
40 | 0 | ConvolveParams *conv_params) { |
41 | 0 | int i, j; |
42 | 0 | const int bd = 8; |
43 | 0 | const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
44 | 0 | const int round_offset = (1 << (offset_bits - conv_params->round_1)) + |
45 | 0 | (1 << (offset_bits - conv_params->round_1 - 1)); |
46 | 0 | const int round_bits = |
47 | 0 | 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; |
48 | |
|
49 | 0 | assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride)); |
50 | 0 | assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride)); |
51 | | |
52 | 0 | assert(h >= 4); |
53 | 0 | assert(w >= 4); |
54 | 0 | assert(IS_POWER_OF_TWO(h)); |
55 | 0 | assert(IS_POWER_OF_TWO(w)); |
56 | | |
57 | 0 | if (subw == 0 && subh == 0) { |
58 | 0 | for (i = 0; i < h; ++i) { |
59 | 0 | for (j = 0; j < w; ++j) { |
60 | 0 | int32_t res; |
61 | 0 | const int m = mask[i * mask_stride + j]; |
62 | 0 | res = ((m * (int32_t)src0[i * src0_stride + j] + |
63 | 0 | (AOM_BLEND_A64_MAX_ALPHA - m) * |
64 | 0 | (int32_t)src1[i * src1_stride + j]) >> |
65 | 0 | AOM_BLEND_A64_ROUND_BITS); |
66 | 0 | res -= round_offset; |
67 | 0 | dst[i * dst_stride + j] = |
68 | 0 | clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); |
69 | 0 | } |
70 | 0 | } |
71 | 0 | } else if (subw == 1 && subh == 1) { |
72 | 0 | for (i = 0; i < h; ++i) { |
73 | 0 | for (j = 0; j < w; ++j) { |
74 | 0 | int32_t res; |
75 | 0 | const int m = ROUND_POWER_OF_TWO( |
76 | 0 | mask[(2 * i) * mask_stride + (2 * j)] + |
77 | 0 | mask[(2 * i + 1) * mask_stride + (2 * j)] + |
78 | 0 | mask[(2 * i) * mask_stride + (2 * j + 1)] + |
79 | 0 | mask[(2 * i + 1) * mask_stride + (2 * j + 1)], |
80 | 0 | 2); |
81 | 0 | res = ((m * (int32_t)src0[i * src0_stride + j] + |
82 | 0 | (AOM_BLEND_A64_MAX_ALPHA - m) * |
83 | 0 | (int32_t)src1[i * src1_stride + j]) >> |
84 | 0 | AOM_BLEND_A64_ROUND_BITS); |
85 | 0 | res -= round_offset; |
86 | 0 | dst[i * dst_stride + j] = |
87 | 0 | clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); |
88 | 0 | } |
89 | 0 | } |
90 | 0 | } else if (subw == 1 && subh == 0) { |
91 | 0 | for (i = 0; i < h; ++i) { |
92 | 0 | for (j = 0; j < w; ++j) { |
93 | 0 | int32_t res; |
94 | 0 | const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], |
95 | 0 | mask[i * mask_stride + (2 * j + 1)]); |
96 | 0 | res = ((m * (int32_t)src0[i * src0_stride + j] + |
97 | 0 | (AOM_BLEND_A64_MAX_ALPHA - m) * |
98 | 0 | (int32_t)src1[i * src1_stride + j]) >> |
99 | 0 | AOM_BLEND_A64_ROUND_BITS); |
100 | 0 | res -= round_offset; |
101 | 0 | dst[i * dst_stride + j] = |
102 | 0 | clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); |
103 | 0 | } |
104 | 0 | } |
105 | 0 | } else { |
106 | 0 | for (i = 0; i < h; ++i) { |
107 | 0 | for (j = 0; j < w; ++j) { |
108 | 0 | int32_t res; |
109 | 0 | const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], |
110 | 0 | mask[(2 * i + 1) * mask_stride + j]); |
111 | 0 | res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] + |
112 | 0 | (AOM_BLEND_A64_MAX_ALPHA - m) * |
113 | 0 | (int32_t)src1[i * src1_stride + j]) >> |
114 | 0 | AOM_BLEND_A64_ROUND_BITS); |
115 | 0 | res -= round_offset; |
116 | 0 | dst[i * dst_stride + j] = |
117 | 0 | clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); |
118 | 0 | } |
119 | 0 | } |
120 | 0 | } |
121 | 0 | } |
122 | | |
123 | | #if CONFIG_AV1_HIGHBITDEPTH |
124 | | void aom_highbd_blend_a64_d16_mask_c( |
125 | | uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0, |
126 | | uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, |
127 | | const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, |
128 | 122 | ConvolveParams *conv_params, const int bd) { |
129 | 122 | const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; |
130 | 122 | const int round_offset = (1 << (offset_bits - conv_params->round_1)) + |
131 | 122 | (1 << (offset_bits - conv_params->round_1 - 1)); |
132 | 122 | const int round_bits = |
133 | 122 | 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; |
134 | 122 | uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); |
135 | | |
136 | 122 | assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); |
137 | 122 | assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); |
138 | | |
139 | 122 | assert(h >= 1); |
140 | 122 | assert(w >= 1); |
141 | 122 | assert(IS_POWER_OF_TWO(h)); |
142 | 122 | assert(IS_POWER_OF_TWO(w)); |
143 | | |
144 | | // excerpt from clip_pixel_highbd() |
145 | | // set saturation_value to (1 << bd) - 1 |
146 | 122 | unsigned int saturation_value; |
147 | 122 | switch (bd) { |
148 | 0 | case 8: |
149 | 0 | default: saturation_value = 255; break; |
150 | 56 | case 10: saturation_value = 1023; break; |
151 | 66 | case 12: saturation_value = 4095; break; |
152 | 122 | } |
153 | | |
154 | 122 | if (subw == 0 && subh == 0) { |
155 | 0 | for (int i = 0; i < h; ++i) { |
156 | 0 | for (int j = 0; j < w; ++j) { |
157 | 0 | int32_t res; |
158 | 0 | const int m = mask[j]; |
159 | 0 | res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> |
160 | 0 | AOM_BLEND_A64_ROUND_BITS); |
161 | 0 | res -= round_offset; |
162 | 0 | unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); |
163 | 0 | dst[j] = AOMMIN(v, saturation_value); |
164 | 0 | } |
165 | 0 | mask += mask_stride; |
166 | 0 | src0 += src0_stride; |
167 | 0 | src1 += src1_stride; |
168 | 0 | dst += dst_stride; |
169 | 0 | } |
170 | 122 | } else if (subw == 1 && subh == 1) { |
171 | 0 | for (int i = 0; i < h; ++i) { |
172 | 0 | for (int j = 0; j < w; ++j) { |
173 | 0 | int32_t res; |
174 | 0 | const int m = ROUND_POWER_OF_TWO( |
175 | 0 | mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] + |
176 | 0 | mask[mask_stride + 2 * j + 1], |
177 | 0 | 2); |
178 | 0 | res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> |
179 | 0 | AOM_BLEND_A64_ROUND_BITS; |
180 | 0 | res -= round_offset; |
181 | 0 | unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); |
182 | 0 | dst[j] = AOMMIN(v, saturation_value); |
183 | 0 | } |
184 | 0 | mask += 2 * mask_stride; |
185 | 0 | src0 += src0_stride; |
186 | 0 | src1 += src1_stride; |
187 | 0 | dst += dst_stride; |
188 | 0 | } |
189 | 122 | } else if (subw == 1 && subh == 0) { |
190 | 2.10k | for (int i = 0; i < h; ++i) { |
191 | 56.8k | for (int j = 0; j < w; ++j) { |
192 | 54.9k | int32_t res; |
193 | 54.9k | const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]); |
194 | 54.9k | res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> |
195 | 54.9k | AOM_BLEND_A64_ROUND_BITS; |
196 | 54.9k | res -= round_offset; |
197 | 54.9k | unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); |
198 | 54.9k | dst[j] = AOMMIN(v, saturation_value); |
199 | 54.9k | } |
200 | 1.98k | mask += mask_stride; |
201 | 1.98k | src0 += src0_stride; |
202 | 1.98k | src1 += src1_stride; |
203 | 1.98k | dst += dst_stride; |
204 | 1.98k | } |
205 | 122 | } else { |
206 | 0 | for (int i = 0; i < h; ++i) { |
207 | 0 | for (int j = 0; j < w; ++j) { |
208 | 0 | int32_t res; |
209 | 0 | const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]); |
210 | 0 | res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> |
211 | 0 | AOM_BLEND_A64_ROUND_BITS; |
212 | 0 | res -= round_offset; |
213 | 0 | unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); |
214 | 0 | dst[j] = AOMMIN(v, saturation_value); |
215 | 0 | } |
216 | 0 | mask += 2 * mask_stride; |
217 | 0 | src0 += src0_stride; |
218 | 0 | src1 += src1_stride; |
219 | 0 | dst += dst_stride; |
220 | 0 | } |
221 | 0 | } |
222 | 122 | } |
223 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
224 | | |
225 | | // Blending with alpha mask. Mask values come from the range [0, 64], |
226 | | // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can |
227 | | // be the same as dst, or dst can be different from both sources. |
228 | | |
229 | | void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, |
230 | | const uint8_t *src0, uint32_t src0_stride, |
231 | | const uint8_t *src1, uint32_t src1_stride, |
232 | | const uint8_t *mask, uint32_t mask_stride, int w, |
233 | 311k | int h, int subw, int subh) { |
234 | 311k | int i, j; |
235 | | |
236 | 311k | assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); |
237 | 311k | assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); |
238 | | |
239 | 311k | assert(h >= 1); |
240 | 311k | assert(w >= 1); |
241 | 311k | assert(IS_POWER_OF_TWO(h)); |
242 | 311k | assert(IS_POWER_OF_TWO(w)); |
243 | | |
244 | 311k | if (subw == 0 && subh == 0) { |
245 | 1.99M | for (i = 0; i < h; ++i) { |
246 | 5.05M | for (j = 0; j < w; ++j) { |
247 | 3.37M | const int m = mask[i * mask_stride + j]; |
248 | 3.37M | dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], |
249 | 3.37M | src1[i * src1_stride + j]); |
250 | 3.37M | } |
251 | 1.68M | } |
252 | 18.4E | } else if (subw == 1 && subh == 1) { |
253 | 0 | for (i = 0; i < h; ++i) { |
254 | 0 | for (j = 0; j < w; ++j) { |
255 | 0 | const int m = ROUND_POWER_OF_TWO( |
256 | 0 | mask[(2 * i) * mask_stride + (2 * j)] + |
257 | 0 | mask[(2 * i + 1) * mask_stride + (2 * j)] + |
258 | 0 | mask[(2 * i) * mask_stride + (2 * j + 1)] + |
259 | 0 | mask[(2 * i + 1) * mask_stride + (2 * j + 1)], |
260 | 0 | 2); |
261 | 0 | dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], |
262 | 0 | src1[i * src1_stride + j]); |
263 | 0 | } |
264 | 0 | } |
265 | 18.4E | } else if (subw == 1 && subh == 0) { |
266 | 0 | for (i = 0; i < h; ++i) { |
267 | 0 | for (j = 0; j < w; ++j) { |
268 | 0 | const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], |
269 | 0 | mask[i * mask_stride + (2 * j + 1)]); |
270 | 0 | dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], |
271 | 0 | src1[i * src1_stride + j]); |
272 | 0 | } |
273 | 0 | } |
274 | 18.4E | } else { |
275 | 18.4E | for (i = 0; i < h; ++i) { |
276 | 0 | for (j = 0; j < w; ++j) { |
277 | 0 | const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], |
278 | 0 | mask[(2 * i + 1) * mask_stride + j]); |
279 | 0 | dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], |
280 | 0 | src1[i * src1_stride + j]); |
281 | 0 | } |
282 | 0 | } |
283 | 18.4E | } |
284 | 311k | } |
285 | | |
286 | | #if CONFIG_AV1_HIGHBITDEPTH |
287 | | void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride, |
288 | | const uint8_t *src0_8, uint32_t src0_stride, |
289 | | const uint8_t *src1_8, uint32_t src1_stride, |
290 | | const uint8_t *mask, uint32_t mask_stride, |
291 | 213k | int w, int h, int subw, int subh, int bd) { |
292 | 213k | int i, j; |
293 | 213k | uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); |
294 | 213k | const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8); |
295 | 213k | const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8); |
296 | 213k | (void)bd; |
297 | | |
298 | 213k | assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); |
299 | 213k | assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); |
300 | | |
301 | 213k | assert(h >= 1); |
302 | 213k | assert(w >= 1); |
303 | 213k | assert(IS_POWER_OF_TWO(h)); |
304 | 213k | assert(IS_POWER_OF_TWO(w)); |
305 | | |
306 | 213k | assert(bd == 8 || bd == 10 || bd == 12); |
307 | | |
308 | 213k | if (subw == 0 && subh == 0) { |
309 | 1.31M | for (i = 0; i < h; ++i) { |
310 | 3.31M | for (j = 0; j < w; ++j) { |
311 | 2.21M | const int m = mask[i * mask_stride + j]; |
312 | 2.21M | dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], |
313 | 2.21M | src1[i * src1_stride + j]); |
314 | 2.21M | } |
315 | 1.10M | } |
316 | 213k | } else if (subw == 1 && subh == 1) { |
317 | 0 | for (i = 0; i < h; ++i) { |
318 | 0 | for (j = 0; j < w; ++j) { |
319 | 0 | const int m = ROUND_POWER_OF_TWO( |
320 | 0 | mask[(2 * i) * mask_stride + (2 * j)] + |
321 | 0 | mask[(2 * i + 1) * mask_stride + (2 * j)] + |
322 | 0 | mask[(2 * i) * mask_stride + (2 * j + 1)] + |
323 | 0 | mask[(2 * i + 1) * mask_stride + (2 * j + 1)], |
324 | 0 | 2); |
325 | 0 | dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], |
326 | 0 | src1[i * src1_stride + j]); |
327 | 0 | } |
328 | 0 | } |
329 | 0 | } else if (subw == 1 && subh == 0) { |
330 | 0 | for (i = 0; i < h; ++i) { |
331 | 0 | for (j = 0; j < w; ++j) { |
332 | 0 | const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], |
333 | 0 | mask[i * mask_stride + (2 * j + 1)]); |
334 | 0 | dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], |
335 | 0 | src1[i * src1_stride + j]); |
336 | 0 | } |
337 | 0 | } |
338 | 0 | } else { |
339 | 0 | for (i = 0; i < h; ++i) { |
340 | 0 | for (j = 0; j < w; ++j) { |
341 | 0 | const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], |
342 | 0 | mask[(2 * i + 1) * mask_stride + j]); |
343 | 0 | dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], |
344 | 0 | src1[i * src1_stride + j]); |
345 | 0 | } |
346 | 0 | } |
347 | 0 | } |
348 | 213k | } |
349 | | #endif // CONFIG_AV1_HIGHBITDEPTH |