Coverage Report

Created: 2025-06-22 08:04

/src/aom/aom_dsp/blend_a64_mask.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
14
#include "aom/aom_integer.h"
15
#include "aom_ports/mem.h"
16
#include "aom_dsp/blend.h"
17
#include "aom_dsp/aom_dsp_common.h"
18
19
#include "config/aom_dsp_rtcd.h"
20
21
// Blending with alpha mask. Mask values come from the range [0, 64],
22
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
23
// be the same as dst, or dst can be different from both sources.
24
25
// NOTE(rachelbarker): The input and output of aom_blend_a64_d16_mask_c() are
26
// in a higher intermediate precision, and will later be rounded down to pixel
27
// precision.
28
// Thus, in order to avoid double-rounding, we want to use normal right shifts
29
// within this function, not ROUND_POWER_OF_TWO.
30
// This works because of the identity:
31
// ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
32
//
33
// In contrast, the output of the non-d16 functions will not be further rounded,
34
// so we *should* use ROUND_POWER_OF_TWO there.
35
36
void aom_lowbd_blend_a64_d16_mask_c(
37
    uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
38
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
39
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
40
1.73k
    ConvolveParams *conv_params) {
41
1.73k
  int i, j;
42
1.73k
  const int bd = 8;
43
1.73k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
44
1.73k
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
45
1.73k
                           (1 << (offset_bits - conv_params->round_1 - 1));
46
1.73k
  const int round_bits =
47
1.73k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
48
49
1.73k
  assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
50
1.73k
  assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
51
52
1.73k
  assert(h >= 4);
53
1.73k
  assert(w >= 4);
54
1.73k
  assert(IS_POWER_OF_TWO(h));
55
1.73k
  assert(IS_POWER_OF_TWO(w));
56
57
1.73k
  if (subw == 0 && subh == 0) {
58
21.5k
    for (i = 0; i < h; ++i) {
59
204k
      for (j = 0; j < w; ++j) {
60
183k
        int32_t res;
61
183k
        const int m = mask[i * mask_stride + j];
62
183k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
63
183k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
64
183k
                    (int32_t)src1[i * src1_stride + j]) >>
65
183k
               AOM_BLEND_A64_ROUND_BITS);
66
183k
        res -= round_offset;
67
183k
        dst[i * dst_stride + j] =
68
183k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
69
183k
      }
70
20.1k
    }
71
1.38k
  } else if (subw == 1 && subh == 1) {
72
440
    for (i = 0; i < h; ++i) {
73
1.76k
      for (j = 0; j < w; ++j) {
74
1.40k
        int32_t res;
75
1.40k
        const int m = ROUND_POWER_OF_TWO(
76
1.40k
            mask[(2 * i) * mask_stride + (2 * j)] +
77
1.40k
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
78
1.40k
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
79
1.40k
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
80
1.40k
            2);
81
1.40k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
82
1.40k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
83
1.40k
                    (int32_t)src1[i * src1_stride + j]) >>
84
1.40k
               AOM_BLEND_A64_ROUND_BITS);
85
1.40k
        res -= round_offset;
86
1.40k
        dst[i * dst_stride + j] =
87
1.40k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
88
1.40k
      }
89
352
    }
90
266
  } else if (subw == 1 && subh == 0) {
91
2.68k
    for (i = 0; i < h; ++i) {
92
29.2k
      for (j = 0; j < w; ++j) {
93
26.8k
        int32_t res;
94
26.8k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
95
26.8k
                                    mask[i * mask_stride + (2 * j + 1)]);
96
26.8k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
97
26.8k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
98
26.8k
                    (int32_t)src1[i * src1_stride + j]) >>
99
26.8k
               AOM_BLEND_A64_ROUND_BITS);
100
26.8k
        res -= round_offset;
101
26.8k
        dst[i * dst_stride + j] =
102
26.8k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
103
26.8k
      }
104
2.41k
    }
105
266
  } else {
106
0
    for (i = 0; i < h; ++i) {
107
0
      for (j = 0; j < w; ++j) {
108
0
        int32_t res;
109
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
110
0
                                    mask[(2 * i + 1) * mask_stride + j]);
111
0
        res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
112
0
                         (AOM_BLEND_A64_MAX_ALPHA - m) *
113
0
                             (int32_t)src1[i * src1_stride + j]) >>
114
0
               AOM_BLEND_A64_ROUND_BITS);
115
0
        res -= round_offset;
116
0
        dst[i * dst_stride + j] =
117
0
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
118
0
      }
119
0
    }
120
0
  }
121
1.73k
}
122
123
#if CONFIG_AV1_HIGHBITDEPTH
124
void aom_highbd_blend_a64_d16_mask_c(
125
    uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
126
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
127
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
128
2.73k
    ConvolveParams *conv_params, const int bd) {
129
2.73k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
130
2.73k
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
131
2.73k
                           (1 << (offset_bits - conv_params->round_1 - 1));
132
2.73k
  const int round_bits =
133
2.73k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
134
2.73k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
135
136
2.73k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
137
2.73k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
138
139
2.73k
  assert(h >= 1);
140
2.73k
  assert(w >= 1);
141
2.73k
  assert(IS_POWER_OF_TWO(h));
142
2.73k
  assert(IS_POWER_OF_TWO(w));
143
144
  // excerpt from clip_pixel_highbd()
145
  // set saturation_value to (1 << bd) - 1
146
2.73k
  unsigned int saturation_value;
147
2.73k
  switch (bd) {
148
0
    case 8:
149
0
    default: saturation_value = 255; break;
150
2.70k
    case 10: saturation_value = 1023; break;
151
30
    case 12: saturation_value = 4095; break;
152
2.73k
  }
153
154
2.73k
  if (subw == 0 && subh == 0) {
155
31.1k
    for (int i = 0; i < h; ++i) {
156
319k
      for (int j = 0; j < w; ++j) {
157
290k
        int32_t res;
158
290k
        const int m = mask[j];
159
290k
        res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
160
290k
               AOM_BLEND_A64_ROUND_BITS);
161
290k
        res -= round_offset;
162
290k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
163
290k
        dst[j] = AOMMIN(v, saturation_value);
164
290k
      }
165
29.1k
      mask += mask_stride;
166
29.1k
      src0 += src0_stride;
167
29.1k
      src1 += src1_stride;
168
29.1k
      dst += dst_stride;
169
29.1k
    }
170
1.98k
  } else if (subw == 1 && subh == 1) {
171
360
    for (int i = 0; i < h; ++i) {
172
1.44k
      for (int j = 0; j < w; ++j) {
173
1.15k
        int32_t res;
174
1.15k
        const int m = ROUND_POWER_OF_TWO(
175
1.15k
            mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] +
176
1.15k
                mask[mask_stride + 2 * j + 1],
177
1.15k
            2);
178
1.15k
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
179
1.15k
              AOM_BLEND_A64_ROUND_BITS;
180
1.15k
        res -= round_offset;
181
1.15k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
182
1.15k
        dst[j] = AOMMIN(v, saturation_value);
183
1.15k
      }
184
288
      mask += 2 * mask_stride;
185
288
      src0 += src0_stride;
186
288
      src1 += src1_stride;
187
288
      dst += dst_stride;
188
288
    }
189
684
  } else if (subw == 1 && subh == 0) {
190
7.06k
    for (int i = 0; i < h; ++i) {
191
78.5k
      for (int j = 0; j < w; ++j) {
192
72.1k
        int32_t res;
193
72.1k
        const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
194
72.1k
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
195
72.1k
              AOM_BLEND_A64_ROUND_BITS;
196
72.1k
        res -= round_offset;
197
72.1k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
198
72.1k
        dst[j] = AOMMIN(v, saturation_value);
199
72.1k
      }
200
6.38k
      mask += mask_stride;
201
6.38k
      src0 += src0_stride;
202
6.38k
      src1 += src1_stride;
203
6.38k
      dst += dst_stride;
204
6.38k
    }
205
684
  } else {
206
0
    for (int i = 0; i < h; ++i) {
207
0
      for (int j = 0; j < w; ++j) {
208
0
        int32_t res;
209
0
        const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
210
0
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
211
0
              AOM_BLEND_A64_ROUND_BITS;
212
0
        res -= round_offset;
213
0
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
214
0
        dst[j] = AOMMIN(v, saturation_value);
215
0
      }
216
0
      mask += 2 * mask_stride;
217
0
      src0 += src0_stride;
218
0
      src1 += src1_stride;
219
0
      dst += dst_stride;
220
0
    }
221
0
  }
222
2.73k
}
223
#endif  // CONFIG_AV1_HIGHBITDEPTH
224
225
// Blending with alpha mask. Mask values come from the range [0, 64],
226
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
227
// be the same as dst, or dst can be different from both sources.
228
229
void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
230
                          const uint8_t *src0, uint32_t src0_stride,
231
                          const uint8_t *src1, uint32_t src1_stride,
232
                          const uint8_t *mask, uint32_t mask_stride, int w,
233
1.58k
                          int h, int subw, int subh) {
234
1.58k
  int i, j;
235
236
1.58k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
237
1.58k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
238
239
1.58k
  assert(h >= 1);
240
1.58k
  assert(w >= 1);
241
1.58k
  assert(IS_POWER_OF_TWO(h));
242
1.58k
  assert(IS_POWER_OF_TWO(w));
243
244
1.58k
  if (subw == 0 && subh == 0) {
245
15.3k
    for (i = 0; i < h; ++i) {
246
155k
      for (j = 0; j < w; ++j) {
247
141k
        const int m = mask[i * mask_stride + j];
248
141k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
249
141k
                                                src1[i * src1_stride + j]);
250
141k
      }
251
13.9k
    }
252
1.44k
  } else if (subw == 1 && subh == 1) {
253
150
    for (i = 0; i < h; ++i) {
254
600
      for (j = 0; j < w; ++j) {
255
480
        const int m = ROUND_POWER_OF_TWO(
256
480
            mask[(2 * i) * mask_stride + (2 * j)] +
257
480
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
258
480
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
259
480
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
260
480
            2);
261
480
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
262
480
                                                src1[i * src1_stride + j]);
263
480
      }
264
120
    }
265
102
  } else if (subw == 1 && subh == 0) {
266
950
    for (i = 0; i < h; ++i) {
267
5.84k
      for (j = 0; j < w; ++j) {
268
4.99k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
269
4.99k
                                    mask[i * mask_stride + (2 * j + 1)]);
270
4.99k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
271
4.99k
                                                src1[i * src1_stride + j]);
272
4.99k
      }
273
848
    }
274
102
  } else {
275
0
    for (i = 0; i < h; ++i) {
276
0
      for (j = 0; j < w; ++j) {
277
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
278
0
                                    mask[(2 * i + 1) * mask_stride + j]);
279
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
280
0
                                                src1[i * src1_stride + j]);
281
0
      }
282
0
    }
283
0
  }
284
1.58k
}
285
286
#if CONFIG_AV1_HIGHBITDEPTH
287
void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
288
                                 const uint8_t *src0_8, uint32_t src0_stride,
289
                                 const uint8_t *src1_8, uint32_t src1_stride,
290
                                 const uint8_t *mask, uint32_t mask_stride,
291
2.97k
                                 int w, int h, int subw, int subh, int bd) {
292
2.97k
  int i, j;
293
2.97k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
294
2.97k
  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
295
2.97k
  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
296
2.97k
  (void)bd;
297
298
2.97k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
299
2.97k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
300
301
2.97k
  assert(h >= 1);
302
2.97k
  assert(w >= 1);
303
2.97k
  assert(IS_POWER_OF_TWO(h));
304
2.97k
  assert(IS_POWER_OF_TWO(w));
305
306
2.97k
  assert(bd == 8 || bd == 10 || bd == 12);
307
308
2.97k
  if (subw == 0 && subh == 0) {
309
29.4k
    for (i = 0; i < h; ++i) {
310
263k
      for (j = 0; j < w; ++j) {
311
237k
        const int m = mask[i * mask_stride + j];
312
237k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
313
237k
                                                src1[i * src1_stride + j]);
314
237k
      }
315
26.5k
    }
316
2.87k
  } else if (subw == 1 && subh == 1) {
317
370
    for (i = 0; i < h; ++i) {
318
1.48k
      for (j = 0; j < w; ++j) {
319
1.18k
        const int m = ROUND_POWER_OF_TWO(
320
1.18k
            mask[(2 * i) * mask_stride + (2 * j)] +
321
1.18k
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
322
1.18k
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
323
1.18k
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
324
1.18k
            2);
325
1.18k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
326
1.18k
                                                src1[i * src1_stride + j]);
327
1.18k
      }
328
296
    }
329
74
  } else if (subw == 1 && subh == 0) {
330
198
    for (i = 0; i < h; ++i) {
331
1.13k
      for (j = 0; j < w; ++j) {
332
960
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
333
960
                                    mask[i * mask_stride + (2 * j + 1)]);
334
960
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
335
960
                                                src1[i * src1_stride + j]);
336
960
      }
337
176
    }
338
22
  } else {
339
0
    for (i = 0; i < h; ++i) {
340
0
      for (j = 0; j < w; ++j) {
341
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
342
0
                                    mask[(2 * i + 1) * mask_stride + j]);
343
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
344
0
                                                src1[i * src1_stride + j]);
345
0
      }
346
0
    }
347
0
  }
348
2.97k
}
349
#endif  // CONFIG_AV1_HIGHBITDEPTH