Coverage Report

Created: 2025-06-13 07:07

/src/aom/aom_dsp/blend_a64_mask.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
14
#include "aom/aom_integer.h"
15
#include "aom_ports/mem.h"
16
#include "aom_dsp/blend.h"
17
#include "aom_dsp/aom_dsp_common.h"
18
19
#include "config/aom_dsp_rtcd.h"
20
21
// Blending with alpha mask. Mask values come from the range [0, 64],
22
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
23
// be the same as dst, or dst can be different from both sources.
24
25
// NOTE(rachelbarker): The input and output of aom_blend_a64_d16_mask_c() are
26
// in a higher intermediate precision, and will later be rounded down to pixel
27
// precision.
28
// Thus, in order to avoid double-rounding, we want to use normal right shifts
29
// within this function, not ROUND_POWER_OF_TWO.
30
// This works because of the identity:
31
// ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
32
//
33
// In contrast, the output of the non-d16 functions will not be further rounded,
34
// so we *should* use ROUND_POWER_OF_TWO there.
35
36
void aom_lowbd_blend_a64_d16_mask_c(
37
    uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
38
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
39
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
40
0
    ConvolveParams *conv_params) {
41
0
  int i, j;
42
0
  const int bd = 8;
43
0
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
44
0
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
45
0
                           (1 << (offset_bits - conv_params->round_1 - 1));
46
0
  const int round_bits =
47
0
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
48
49
0
  assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
50
0
  assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
51
52
0
  assert(h >= 4);
53
0
  assert(w >= 4);
54
0
  assert(IS_POWER_OF_TWO(h));
55
0
  assert(IS_POWER_OF_TWO(w));
56
57
0
  if (subw == 0 && subh == 0) {
58
0
    for (i = 0; i < h; ++i) {
59
0
      for (j = 0; j < w; ++j) {
60
0
        int32_t res;
61
0
        const int m = mask[i * mask_stride + j];
62
0
        res = ((m * (int32_t)src0[i * src0_stride + j] +
63
0
                (AOM_BLEND_A64_MAX_ALPHA - m) *
64
0
                    (int32_t)src1[i * src1_stride + j]) >>
65
0
               AOM_BLEND_A64_ROUND_BITS);
66
0
        res -= round_offset;
67
0
        dst[i * dst_stride + j] =
68
0
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
69
0
      }
70
0
    }
71
0
  } else if (subw == 1 && subh == 1) {
72
0
    for (i = 0; i < h; ++i) {
73
0
      for (j = 0; j < w; ++j) {
74
0
        int32_t res;
75
0
        const int m = ROUND_POWER_OF_TWO(
76
0
            mask[(2 * i) * mask_stride + (2 * j)] +
77
0
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
78
0
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
79
0
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
80
0
            2);
81
0
        res = ((m * (int32_t)src0[i * src0_stride + j] +
82
0
                (AOM_BLEND_A64_MAX_ALPHA - m) *
83
0
                    (int32_t)src1[i * src1_stride + j]) >>
84
0
               AOM_BLEND_A64_ROUND_BITS);
85
0
        res -= round_offset;
86
0
        dst[i * dst_stride + j] =
87
0
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
88
0
      }
89
0
    }
90
0
  } else if (subw == 1 && subh == 0) {
91
0
    for (i = 0; i < h; ++i) {
92
0
      for (j = 0; j < w; ++j) {
93
0
        int32_t res;
94
0
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
95
0
                                    mask[i * mask_stride + (2 * j + 1)]);
96
0
        res = ((m * (int32_t)src0[i * src0_stride + j] +
97
0
                (AOM_BLEND_A64_MAX_ALPHA - m) *
98
0
                    (int32_t)src1[i * src1_stride + j]) >>
99
0
               AOM_BLEND_A64_ROUND_BITS);
100
0
        res -= round_offset;
101
0
        dst[i * dst_stride + j] =
102
0
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
103
0
      }
104
0
    }
105
0
  } else {
106
0
    for (i = 0; i < h; ++i) {
107
0
      for (j = 0; j < w; ++j) {
108
0
        int32_t res;
109
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
110
0
                                    mask[(2 * i + 1) * mask_stride + j]);
111
0
        res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
112
0
                         (AOM_BLEND_A64_MAX_ALPHA - m) *
113
0
                             (int32_t)src1[i * src1_stride + j]) >>
114
0
               AOM_BLEND_A64_ROUND_BITS);
115
0
        res -= round_offset;
116
0
        dst[i * dst_stride + j] =
117
0
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
118
0
      }
119
0
    }
120
0
  }
121
0
}
122
123
#if CONFIG_AV1_HIGHBITDEPTH
124
void aom_highbd_blend_a64_d16_mask_c(
125
    uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
126
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
127
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
128
122
    ConvolveParams *conv_params, const int bd) {
129
122
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
130
122
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
131
122
                           (1 << (offset_bits - conv_params->round_1 - 1));
132
122
  const int round_bits =
133
122
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
134
122
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
135
136
122
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
137
122
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
138
139
122
  assert(h >= 1);
140
122
  assert(w >= 1);
141
122
  assert(IS_POWER_OF_TWO(h));
142
122
  assert(IS_POWER_OF_TWO(w));
143
144
  // excerpt from clip_pixel_highbd()
145
  // set saturation_value to (1 << bd) - 1
146
122
  unsigned int saturation_value;
147
122
  switch (bd) {
148
0
    case 8:
149
0
    default: saturation_value = 255; break;
150
56
    case 10: saturation_value = 1023; break;
151
66
    case 12: saturation_value = 4095; break;
152
122
  }
153
154
122
  if (subw == 0 && subh == 0) {
155
0
    for (int i = 0; i < h; ++i) {
156
0
      for (int j = 0; j < w; ++j) {
157
0
        int32_t res;
158
0
        const int m = mask[j];
159
0
        res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
160
0
               AOM_BLEND_A64_ROUND_BITS);
161
0
        res -= round_offset;
162
0
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
163
0
        dst[j] = AOMMIN(v, saturation_value);
164
0
      }
165
0
      mask += mask_stride;
166
0
      src0 += src0_stride;
167
0
      src1 += src1_stride;
168
0
      dst += dst_stride;
169
0
    }
170
122
  } else if (subw == 1 && subh == 1) {
171
0
    for (int i = 0; i < h; ++i) {
172
0
      for (int j = 0; j < w; ++j) {
173
0
        int32_t res;
174
0
        const int m = ROUND_POWER_OF_TWO(
175
0
            mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] +
176
0
                mask[mask_stride + 2 * j + 1],
177
0
            2);
178
0
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
179
0
              AOM_BLEND_A64_ROUND_BITS;
180
0
        res -= round_offset;
181
0
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
182
0
        dst[j] = AOMMIN(v, saturation_value);
183
0
      }
184
0
      mask += 2 * mask_stride;
185
0
      src0 += src0_stride;
186
0
      src1 += src1_stride;
187
0
      dst += dst_stride;
188
0
    }
189
122
  } else if (subw == 1 && subh == 0) {
190
2.10k
    for (int i = 0; i < h; ++i) {
191
56.8k
      for (int j = 0; j < w; ++j) {
192
54.9k
        int32_t res;
193
54.9k
        const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
194
54.9k
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
195
54.9k
              AOM_BLEND_A64_ROUND_BITS;
196
54.9k
        res -= round_offset;
197
54.9k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
198
54.9k
        dst[j] = AOMMIN(v, saturation_value);
199
54.9k
      }
200
1.98k
      mask += mask_stride;
201
1.98k
      src0 += src0_stride;
202
1.98k
      src1 += src1_stride;
203
1.98k
      dst += dst_stride;
204
1.98k
    }
205
122
  } else {
206
0
    for (int i = 0; i < h; ++i) {
207
0
      for (int j = 0; j < w; ++j) {
208
0
        int32_t res;
209
0
        const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
210
0
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
211
0
              AOM_BLEND_A64_ROUND_BITS;
212
0
        res -= round_offset;
213
0
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
214
0
        dst[j] = AOMMIN(v, saturation_value);
215
0
      }
216
0
      mask += 2 * mask_stride;
217
0
      src0 += src0_stride;
218
0
      src1 += src1_stride;
219
0
      dst += dst_stride;
220
0
    }
221
0
  }
222
122
}
223
#endif  // CONFIG_AV1_HIGHBITDEPTH
224
225
// Blending with alpha mask. Mask values come from the range [0, 64],
226
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
227
// be the same as dst, or dst can be different from both sources.
228
229
void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
230
                          const uint8_t *src0, uint32_t src0_stride,
231
                          const uint8_t *src1, uint32_t src1_stride,
232
                          const uint8_t *mask, uint32_t mask_stride, int w,
233
311k
                          int h, int subw, int subh) {
234
311k
  int i, j;
235
236
311k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
237
311k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
238
239
311k
  assert(h >= 1);
240
311k
  assert(w >= 1);
241
311k
  assert(IS_POWER_OF_TWO(h));
242
311k
  assert(IS_POWER_OF_TWO(w));
243
244
311k
  if (subw == 0 && subh == 0) {
245
1.99M
    for (i = 0; i < h; ++i) {
246
5.05M
      for (j = 0; j < w; ++j) {
247
3.37M
        const int m = mask[i * mask_stride + j];
248
3.37M
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
249
3.37M
                                                src1[i * src1_stride + j]);
250
3.37M
      }
251
1.68M
    }
252
18.4E
  } else if (subw == 1 && subh == 1) {
253
0
    for (i = 0; i < h; ++i) {
254
0
      for (j = 0; j < w; ++j) {
255
0
        const int m = ROUND_POWER_OF_TWO(
256
0
            mask[(2 * i) * mask_stride + (2 * j)] +
257
0
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
258
0
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
259
0
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
260
0
            2);
261
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
262
0
                                                src1[i * src1_stride + j]);
263
0
      }
264
0
    }
265
18.4E
  } else if (subw == 1 && subh == 0) {
266
0
    for (i = 0; i < h; ++i) {
267
0
      for (j = 0; j < w; ++j) {
268
0
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
269
0
                                    mask[i * mask_stride + (2 * j + 1)]);
270
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
271
0
                                                src1[i * src1_stride + j]);
272
0
      }
273
0
    }
274
18.4E
  } else {
275
18.4E
    for (i = 0; i < h; ++i) {
276
0
      for (j = 0; j < w; ++j) {
277
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
278
0
                                    mask[(2 * i + 1) * mask_stride + j]);
279
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
280
0
                                                src1[i * src1_stride + j]);
281
0
      }
282
0
    }
283
18.4E
  }
284
311k
}
285
286
#if CONFIG_AV1_HIGHBITDEPTH
287
void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
288
                                 const uint8_t *src0_8, uint32_t src0_stride,
289
                                 const uint8_t *src1_8, uint32_t src1_stride,
290
                                 const uint8_t *mask, uint32_t mask_stride,
291
213k
                                 int w, int h, int subw, int subh, int bd) {
292
213k
  int i, j;
293
213k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
294
213k
  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
295
213k
  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
296
213k
  (void)bd;
297
298
213k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
299
213k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
300
301
213k
  assert(h >= 1);
302
213k
  assert(w >= 1);
303
213k
  assert(IS_POWER_OF_TWO(h));
304
213k
  assert(IS_POWER_OF_TWO(w));
305
306
213k
  assert(bd == 8 || bd == 10 || bd == 12);
307
308
213k
  if (subw == 0 && subh == 0) {
309
1.31M
    for (i = 0; i < h; ++i) {
310
3.31M
      for (j = 0; j < w; ++j) {
311
2.21M
        const int m = mask[i * mask_stride + j];
312
2.21M
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
313
2.21M
                                                src1[i * src1_stride + j]);
314
2.21M
      }
315
1.10M
    }
316
213k
  } else if (subw == 1 && subh == 1) {
317
0
    for (i = 0; i < h; ++i) {
318
0
      for (j = 0; j < w; ++j) {
319
0
        const int m = ROUND_POWER_OF_TWO(
320
0
            mask[(2 * i) * mask_stride + (2 * j)] +
321
0
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
322
0
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
323
0
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
324
0
            2);
325
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
326
0
                                                src1[i * src1_stride + j]);
327
0
      }
328
0
    }
329
0
  } else if (subw == 1 && subh == 0) {
330
0
    for (i = 0; i < h; ++i) {
331
0
      for (j = 0; j < w; ++j) {
332
0
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
333
0
                                    mask[i * mask_stride + (2 * j + 1)]);
334
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
335
0
                                                src1[i * src1_stride + j]);
336
0
      }
337
0
    }
338
0
  } else {
339
0
    for (i = 0; i < h; ++i) {
340
0
      for (j = 0; j < w; ++j) {
341
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
342
0
                                    mask[(2 * i + 1) * mask_stride + j]);
343
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
344
0
                                                src1[i * src1_stride + j]);
345
0
      }
346
0
    }
347
0
  }
348
213k
}
349
#endif  // CONFIG_AV1_HIGHBITDEPTH