Coverage Report

Created: 2026-06-14 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/aom_dsp/blend_a64_mask.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
14
#include "aom/aom_integer.h"
15
#include "aom_ports/mem.h"
16
#include "aom_dsp/blend.h"
17
#include "aom_dsp/aom_dsp_common.h"
18
19
#include "config/aom_dsp_rtcd.h"
20
21
// Blending with alpha mask. Mask values come from the range [0, 64],
22
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
23
// be the same as dst, or dst can be different from both sources.
24
25
// NOTE(rachelbarker): The input and output of aom_blend_a64_d16_mask_c() are
26
// in a higher intermediate precision, and will later be rounded down to pixel
27
// precision.
28
// Thus, in order to avoid double-rounding, we want to use normal right shifts
29
// within this function, not ROUND_POWER_OF_TWO.
30
// This works because of the identity:
31
// ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
32
//
33
// In contrast, the output of the non-d16 functions will not be further rounded,
34
// so we *should* use ROUND_POWER_OF_TWO there.
35
36
void aom_lowbd_blend_a64_d16_mask_c(
37
    uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
38
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
39
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
40
1.31k
    ConvolveParams *conv_params) {
41
1.31k
  int i, j;
42
1.31k
  const int bd = 8;
43
1.31k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
44
1.31k
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
45
1.31k
                           (1 << (offset_bits - conv_params->round_1 - 1));
46
1.31k
  const int round_bits =
47
1.31k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
48
49
1.31k
  assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
50
1.31k
  assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
51
52
1.31k
  assert(h >= 4);
53
1.31k
  assert(w >= 4);
54
1.31k
  assert(IS_POWER_OF_TWO(h));
55
1.31k
  assert(IS_POWER_OF_TWO(w));
56
57
1.31k
  if (subw == 0 && subh == 0) {
58
18.2k
    for (i = 0; i < h; ++i) {
59
166k
      for (j = 0; j < w; ++j) {
60
149k
        int32_t res;
61
149k
        const int m = mask[i * mask_stride + j];
62
149k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
63
149k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
64
149k
                    (int32_t)src1[i * src1_stride + j]) >>
65
149k
               AOM_BLEND_A64_ROUND_BITS);
66
149k
        res -= round_offset;
67
149k
        dst[i * dst_stride + j] =
68
149k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
69
149k
      }
70
17.1k
    }
71
1.11k
  } else if (subw == 1 && subh == 1) {
72
230
    for (i = 0; i < h; ++i) {
73
920
      for (j = 0; j < w; ++j) {
74
736
        int32_t res;
75
736
        const int m = ROUND_POWER_OF_TWO(
76
736
            mask[(2 * i) * mask_stride + (2 * j)] +
77
736
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
78
736
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
79
736
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
80
736
            2);
81
736
        res = ((m * (int32_t)src0[i * src0_stride + j] +
82
736
                (AOM_BLEND_A64_MAX_ALPHA - m) *
83
736
                    (int32_t)src1[i * src1_stride + j]) >>
84
736
               AOM_BLEND_A64_ROUND_BITS);
85
736
        res -= round_offset;
86
736
        dst[i * dst_stride + j] =
87
736
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
88
736
      }
89
184
    }
90
152
  } else if (subw == 1 && subh == 0) {
91
1.62k
    for (i = 0; i < h; ++i) {
92
19.8k
      for (j = 0; j < w; ++j) {
93
18.3k
        int32_t res;
94
18.3k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
95
18.3k
                                    mask[i * mask_stride + (2 * j + 1)]);
96
18.3k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
97
18.3k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
98
18.3k
                    (int32_t)src1[i * src1_stride + j]) >>
99
18.3k
               AOM_BLEND_A64_ROUND_BITS);
100
18.3k
        res -= round_offset;
101
18.3k
        dst[i * dst_stride + j] =
102
18.3k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
103
18.3k
      }
104
1.47k
    }
105
152
  } else {
106
0
    for (i = 0; i < h; ++i) {
107
0
      for (j = 0; j < w; ++j) {
108
0
        int32_t res;
109
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
110
0
                                    mask[(2 * i + 1) * mask_stride + j]);
111
0
        res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
112
0
                         (AOM_BLEND_A64_MAX_ALPHA - m) *
113
0
                             (int32_t)src1[i * src1_stride + j]) >>
114
0
               AOM_BLEND_A64_ROUND_BITS);
115
0
        res -= round_offset;
116
0
        dst[i * dst_stride + j] =
117
0
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
118
0
      }
119
0
    }
120
0
  }
121
1.31k
}
122
123
#if CONFIG_AV1_HIGHBITDEPTH
124
void aom_highbd_blend_a64_d16_mask_c(
125
    uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
126
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
127
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
128
2.24k
    ConvolveParams *conv_params, const int bd) {
129
2.24k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
130
2.24k
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
131
2.24k
                           (1 << (offset_bits - conv_params->round_1 - 1));
132
2.24k
  const int round_bits =
133
2.24k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
134
2.24k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
135
136
2.24k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
137
2.24k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
138
139
2.24k
  assert(h >= 1);
140
2.24k
  assert(w >= 1);
141
2.24k
  assert(IS_POWER_OF_TWO(h));
142
2.24k
  assert(IS_POWER_OF_TWO(w));
143
144
  // excerpt from clip_pixel_highbd()
145
  // set saturation_value to (1 << bd) - 1
146
2.24k
  unsigned int saturation_value;
147
2.24k
  switch (bd) {
148
0
    case 8:
149
0
    default: saturation_value = 255; break;
150
2.21k
    case 10: saturation_value = 1023; break;
151
27
    case 12: saturation_value = 4095; break;
152
2.24k
  }
153
154
2.24k
  if (subw == 0 && subh == 0) {
155
23.0k
    for (int i = 0; i < h; ++i) {
156
250k
      for (int j = 0; j < w; ++j) {
157
228k
        int32_t res;
158
228k
        const int m = mask[j];
159
228k
        res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
160
228k
               AOM_BLEND_A64_ROUND_BITS);
161
228k
        res -= round_offset;
162
228k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
163
228k
        dst[j] = AOMMIN(v, saturation_value);
164
228k
      }
165
21.5k
      mask += mask_stride;
166
21.5k
      src0 += src0_stride;
167
21.5k
      src1 += src1_stride;
168
21.5k
      dst += dst_stride;
169
21.5k
    }
170
1.51k
  } else if (subw == 1 && subh == 1) {
171
210
    for (int i = 0; i < h; ++i) {
172
840
      for (int j = 0; j < w; ++j) {
173
672
        int32_t res;
174
672
        const int m = ROUND_POWER_OF_TWO(
175
672
            mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] +
176
672
                mask[mask_stride + 2 * j + 1],
177
672
            2);
178
672
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
179
672
              AOM_BLEND_A64_ROUND_BITS;
180
672
        res -= round_offset;
181
672
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
182
672
        dst[j] = AOMMIN(v, saturation_value);
183
672
      }
184
168
      mask += 2 * mask_stride;
185
168
      src0 += src0_stride;
186
168
      src1 += src1_stride;
187
168
      dst += dst_stride;
188
168
    }
189
688
  } else if (subw == 1 && subh == 0) {
190
7.28k
    for (int i = 0; i < h; ++i) {
191
88.7k
      for (int j = 0; j < w; ++j) {
192
82.1k
        int32_t res;
193
82.1k
        const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
194
82.1k
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
195
82.1k
              AOM_BLEND_A64_ROUND_BITS;
196
82.1k
        res -= round_offset;
197
82.1k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
198
82.1k
        dst[j] = AOMMIN(v, saturation_value);
199
82.1k
      }
200
6.59k
      mask += mask_stride;
201
6.59k
      src0 += src0_stride;
202
6.59k
      src1 += src1_stride;
203
6.59k
      dst += dst_stride;
204
6.59k
    }
205
688
  } else {
206
0
    for (int i = 0; i < h; ++i) {
207
0
      for (int j = 0; j < w; ++j) {
208
0
        int32_t res;
209
0
        const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
210
0
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
211
0
              AOM_BLEND_A64_ROUND_BITS;
212
0
        res -= round_offset;
213
0
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
214
0
        dst[j] = AOMMIN(v, saturation_value);
215
0
      }
216
0
      mask += 2 * mask_stride;
217
0
      src0 += src0_stride;
218
0
      src1 += src1_stride;
219
0
      dst += dst_stride;
220
0
    }
221
0
  }
222
2.24k
}
223
#endif  // CONFIG_AV1_HIGHBITDEPTH
224
225
// Blending with alpha mask. Mask values come from the range [0, 64],
226
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
227
// be the same as dst, or dst can be different from both sources.
228
229
void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
230
                          const uint8_t *src0, uint32_t src0_stride,
231
                          const uint8_t *src1, uint32_t src1_stride,
232
                          const uint8_t *mask, uint32_t mask_stride, int w,
233
1.08k
                          int h, int subw, int subh) {
234
1.08k
  int i, j;
235
236
1.08k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
237
1.08k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
238
239
1.08k
  assert(h >= 1);
240
1.08k
  assert(w >= 1);
241
1.08k
  assert(IS_POWER_OF_TWO(h));
242
1.08k
  assert(IS_POWER_OF_TWO(w));
243
244
1.08k
  if (subw == 0 && subh == 0) {
245
11.9k
    for (i = 0; i < h; ++i) {
246
141k
      for (j = 0; j < w; ++j) {
247
130k
        const int m = mask[i * mask_stride + j];
248
130k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
249
130k
                                                src1[i * src1_stride + j]);
250
130k
      }
251
11.0k
    }
252
965
  } else if (subw == 1 && subh == 1) {
253
280
    for (i = 0; i < h; ++i) {
254
1.12k
      for (j = 0; j < w; ++j) {
255
896
        const int m = ROUND_POWER_OF_TWO(
256
896
            mask[(2 * i) * mask_stride + (2 * j)] +
257
896
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
258
896
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
259
896
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
260
896
            2);
261
896
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
262
896
                                                src1[i * src1_stride + j]);
263
896
      }
264
224
    }
265
66
  } else if (subw == 1 && subh == 0) {
266
594
    for (i = 0; i < h; ++i) {
267
3.08k
      for (j = 0; j < w; ++j) {
268
2.56k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
269
2.56k
                                    mask[i * mask_stride + (2 * j + 1)]);
270
2.56k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
271
2.56k
                                                src1[i * src1_stride + j]);
272
2.56k
      }
273
528
    }
274
66
  } else {
275
0
    for (i = 0; i < h; ++i) {
276
0
      for (j = 0; j < w; ++j) {
277
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
278
0
                                    mask[(2 * i + 1) * mask_stride + j]);
279
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
280
0
                                                src1[i * src1_stride + j]);
281
0
      }
282
0
    }
283
0
  }
284
1.08k
}
285
286
#if CONFIG_AV1_HIGHBITDEPTH
287
void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
288
                                 const uint8_t *src0_8, uint32_t src0_stride,
289
                                 const uint8_t *src1_8, uint32_t src1_stride,
290
                                 const uint8_t *mask, uint32_t mask_stride,
291
2.96k
                                 int w, int h, int subw, int subh, int bd) {
292
2.96k
  int i, j;
293
2.96k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
294
2.96k
  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
295
2.96k
  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
296
2.96k
  (void)bd;
297
298
2.96k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
299
2.96k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
300
301
2.96k
  assert(h >= 1);
302
2.96k
  assert(w >= 1);
303
2.96k
  assert(IS_POWER_OF_TWO(h));
304
2.96k
  assert(IS_POWER_OF_TWO(w));
305
306
2.96k
  assert(bd == 8 || bd == 10 || bd == 12);
307
308
2.96k
  if (subw == 0 && subh == 0) {
309
29.5k
    for (i = 0; i < h; ++i) {
310
261k
      for (j = 0; j < w; ++j) {
311
235k
        const int m = mask[i * mask_stride + j];
312
235k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
313
235k
                                                src1[i * src1_stride + j]);
314
235k
      }
315
26.6k
    }
316
2.89k
  } else if (subw == 1 && subh == 1) {
317
300
    for (i = 0; i < h; ++i) {
318
1.20k
      for (j = 0; j < w; ++j) {
319
960
        const int m = ROUND_POWER_OF_TWO(
320
960
            mask[(2 * i) * mask_stride + (2 * j)] +
321
960
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
322
960
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
323
960
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
324
960
            2);
325
960
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
326
960
                                                src1[i * src1_stride + j]);
327
960
      }
328
240
    }
329
60
  } else if (subw == 1 && subh == 0) {
330
122
    for (i = 0; i < h; ++i) {
331
880
      for (j = 0; j < w; ++j) {
332
768
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
333
768
                                    mask[i * mask_stride + (2 * j + 1)]);
334
768
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
335
768
                                                src1[i * src1_stride + j]);
336
768
      }
337
112
    }
338
10
  } else {
339
0
    for (i = 0; i < h; ++i) {
340
0
      for (j = 0; j < w; ++j) {
341
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
342
0
                                    mask[(2 * i + 1) * mask_stride + j]);
343
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
344
0
                                                src1[i * src1_stride + j]);
345
0
      }
346
0
    }
347
0
  }
348
2.96k
}
349
#endif  // CONFIG_AV1_HIGHBITDEPTH