Coverage Report

Created: 2026-01-20 07:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/aom_dsp/blend_a64_mask.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
14
#include "aom/aom_integer.h"
15
#include "aom_ports/mem.h"
16
#include "aom_dsp/blend.h"
17
#include "aom_dsp/aom_dsp_common.h"
18
19
#include "config/aom_dsp_rtcd.h"
20
21
// Blending with alpha mask. Mask values come from the range [0, 64],
22
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
23
// be the same as dst, or dst can be different from both sources.
24
25
// NOTE(rachelbarker): The input and output of aom_blend_a64_d16_mask_c() are
26
// in a higher intermediate precision, and will later be rounded down to pixel
27
// precision.
28
// Thus, in order to avoid double-rounding, we want to use normal right shifts
29
// within this function, not ROUND_POWER_OF_TWO.
30
// This works because of the identity:
31
// ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
32
//
33
// In contrast, the output of the non-d16 functions will not be further rounded,
34
// so we *should* use ROUND_POWER_OF_TWO there.
35
36
void aom_lowbd_blend_a64_d16_mask_c(
37
    uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
38
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
39
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
40
4.12k
    ConvolveParams *conv_params) {
41
4.12k
  int i, j;
42
4.12k
  const int bd = 8;
43
4.12k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
44
4.12k
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
45
4.12k
                           (1 << (offset_bits - conv_params->round_1 - 1));
46
4.12k
  const int round_bits =
47
4.12k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
48
49
4.12k
  assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
50
4.12k
  assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
51
52
4.12k
  assert(h >= 4);
53
4.12k
  assert(w >= 4);
54
4.12k
  assert(IS_POWER_OF_TWO(h));
55
4.12k
  assert(IS_POWER_OF_TWO(w));
56
57
4.12k
  if (subw == 0 && subh == 0) {
58
39.2k
    for (i = 0; i < h; ++i) {
59
363k
      for (j = 0; j < w; ++j) {
60
326k
        int32_t res;
61
326k
        const int m = mask[i * mask_stride + j];
62
326k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
63
326k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
64
326k
                    (int32_t)src1[i * src1_stride + j]) >>
65
326k
               AOM_BLEND_A64_ROUND_BITS);
66
326k
        res -= round_offset;
67
326k
        dst[i * dst_stride + j] =
68
326k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
69
326k
      }
70
36.6k
    }
71
2.64k
  } else if (subw == 1 && subh == 1) {
72
70
    for (i = 0; i < h; ++i) {
73
280
      for (j = 0; j < w; ++j) {
74
224
        int32_t res;
75
224
        const int m = ROUND_POWER_OF_TWO(
76
224
            mask[(2 * i) * mask_stride + (2 * j)] +
77
224
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
78
224
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
79
224
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
80
224
            2);
81
224
        res = ((m * (int32_t)src0[i * src0_stride + j] +
82
224
                (AOM_BLEND_A64_MAX_ALPHA - m) *
83
224
                    (int32_t)src1[i * src1_stride + j]) >>
84
224
               AOM_BLEND_A64_ROUND_BITS);
85
224
        res -= round_offset;
86
224
        dst[i * dst_stride + j] =
87
224
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
88
224
      }
89
56
    }
90
1.46k
  } else if (subw == 1 && subh == 0) {
91
14.0k
    for (i = 0; i < h; ++i) {
92
94.2k
      for (j = 0; j < w; ++j) {
93
81.6k
        int32_t res;
94
81.6k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
95
81.6k
                                    mask[i * mask_stride + (2 * j + 1)]);
96
81.6k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
97
81.6k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
98
81.6k
                    (int32_t)src1[i * src1_stride + j]) >>
99
81.6k
               AOM_BLEND_A64_ROUND_BITS);
100
81.6k
        res -= round_offset;
101
81.6k
        dst[i * dst_stride + j] =
102
81.6k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
103
81.6k
      }
104
12.6k
    }
105
1.46k
  } else {
106
0
    for (i = 0; i < h; ++i) {
107
0
      for (j = 0; j < w; ++j) {
108
0
        int32_t res;
109
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
110
0
                                    mask[(2 * i + 1) * mask_stride + j]);
111
0
        res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
112
0
                         (AOM_BLEND_A64_MAX_ALPHA - m) *
113
0
                             (int32_t)src1[i * src1_stride + j]) >>
114
0
               AOM_BLEND_A64_ROUND_BITS);
115
0
        res -= round_offset;
116
0
        dst[i * dst_stride + j] =
117
0
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
118
0
      }
119
0
    }
120
0
  }
121
4.12k
}
122
123
#if CONFIG_AV1_HIGHBITDEPTH
124
void aom_highbd_blend_a64_d16_mask_c(
125
    uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
126
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
127
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
128
4.37k
    ConvolveParams *conv_params, const int bd) {
129
4.37k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
130
4.37k
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
131
4.37k
                           (1 << (offset_bits - conv_params->round_1 - 1));
132
4.37k
  const int round_bits =
133
4.37k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
134
4.37k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
135
136
4.37k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
137
4.37k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
138
139
4.37k
  assert(h >= 1);
140
4.37k
  assert(w >= 1);
141
4.37k
  assert(IS_POWER_OF_TWO(h));
142
4.37k
  assert(IS_POWER_OF_TWO(w));
143
144
  // excerpt from clip_pixel_highbd()
145
  // set saturation_value to (1 << bd) - 1
146
4.37k
  unsigned int saturation_value;
147
4.37k
  switch (bd) {
148
0
    case 8:
149
0
    default: saturation_value = 255; break;
150
4.37k
    case 10: saturation_value = 1023; break;
151
0
    case 12: saturation_value = 4095; break;
152
4.37k
  }
153
154
4.37k
  if (subw == 0 && subh == 0) {
155
25.4k
    for (int i = 0; i < h; ++i) {
156
245k
      for (int j = 0; j < w; ++j) {
157
222k
        int32_t res;
158
222k
        const int m = mask[j];
159
222k
        res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
160
222k
               AOM_BLEND_A64_ROUND_BITS);
161
222k
        res -= round_offset;
162
222k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
163
222k
        dst[j] = AOMMIN(v, saturation_value);
164
222k
      }
165
23.4k
      mask += mask_stride;
166
23.4k
      src0 += src0_stride;
167
23.4k
      src1 += src1_stride;
168
23.4k
      dst += dst_stride;
169
23.4k
    }
170
2.36k
  } else if (subw == 1 && subh == 1) {
171
70
    for (int i = 0; i < h; ++i) {
172
280
      for (int j = 0; j < w; ++j) {
173
224
        int32_t res;
174
224
        const int m = ROUND_POWER_OF_TWO(
175
224
            mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] +
176
224
                mask[mask_stride + 2 * j + 1],
177
224
            2);
178
224
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
179
224
              AOM_BLEND_A64_ROUND_BITS;
180
224
        res -= round_offset;
181
224
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
182
224
        dst[j] = AOMMIN(v, saturation_value);
183
224
      }
184
56
      mask += 2 * mask_stride;
185
56
      src0 += src0_stride;
186
56
      src1 += src1_stride;
187
56
      dst += dst_stride;
188
56
    }
189
2.34k
  } else if (subw == 1 && subh == 0) {
190
23.0k
    for (int i = 0; i < h; ++i) {
191
137k
      for (int j = 0; j < w; ++j) {
192
116k
        int32_t res;
193
116k
        const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
194
116k
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
195
116k
              AOM_BLEND_A64_ROUND_BITS;
196
116k
        res -= round_offset;
197
116k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
198
116k
        dst[j] = AOMMIN(v, saturation_value);
199
116k
      }
200
20.7k
      mask += mask_stride;
201
20.7k
      src0 += src0_stride;
202
20.7k
      src1 += src1_stride;
203
20.7k
      dst += dst_stride;
204
20.7k
    }
205
2.34k
  } else {
206
0
    for (int i = 0; i < h; ++i) {
207
0
      for (int j = 0; j < w; ++j) {
208
0
        int32_t res;
209
0
        const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
210
0
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
211
0
              AOM_BLEND_A64_ROUND_BITS;
212
0
        res -= round_offset;
213
0
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
214
0
        dst[j] = AOMMIN(v, saturation_value);
215
0
      }
216
0
      mask += 2 * mask_stride;
217
0
      src0 += src0_stride;
218
0
      src1 += src1_stride;
219
0
      dst += dst_stride;
220
0
    }
221
0
  }
222
4.37k
}
223
#endif  // CONFIG_AV1_HIGHBITDEPTH
224
225
// Blending with alpha mask. Mask values come from the range [0, 64],
226
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
227
// be the same as dst, or dst can be different from both sources.
228
229
void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
230
                          const uint8_t *src0, uint32_t src0_stride,
231
                          const uint8_t *src1, uint32_t src1_stride,
232
                          const uint8_t *mask, uint32_t mask_stride, int w,
233
2.19k
                          int h, int subw, int subh) {
234
2.19k
  int i, j;
235
236
2.19k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
237
2.19k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
238
239
2.19k
  assert(h >= 1);
240
2.19k
  assert(w >= 1);
241
2.19k
  assert(IS_POWER_OF_TWO(h));
242
2.19k
  assert(IS_POWER_OF_TWO(w));
243
244
2.19k
  if (subw == 0 && subh == 0) {
245
19.7k
    for (i = 0; i < h; ++i) {
246
182k
      for (j = 0; j < w; ++j) {
247
164k
        const int m = mask[i * mask_stride + j];
248
164k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
249
164k
                                                src1[i * src1_stride + j]);
250
164k
      }
251
17.7k
    }
252
1.96k
  } else if (subw == 1 && subh == 1) {
253
40
    for (i = 0; i < h; ++i) {
254
160
      for (j = 0; j < w; ++j) {
255
128
        const int m = ROUND_POWER_OF_TWO(
256
128
            mask[(2 * i) * mask_stride + (2 * j)] +
257
128
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
258
128
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
259
128
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
260
128
            2);
261
128
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
262
128
                                                src1[i * src1_stride + j]);
263
128
      }
264
32
    }
265
224
  } else if (subw == 1 && subh == 0) {
266
2.09k
    for (i = 0; i < h; ++i) {
267
12.3k
      for (j = 0; j < w; ++j) {
268
10.4k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
269
10.4k
                                    mask[i * mask_stride + (2 * j + 1)]);
270
10.4k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
271
10.4k
                                                src1[i * src1_stride + j]);
272
10.4k
      }
273
1.87k
    }
274
224
  } else {
275
0
    for (i = 0; i < h; ++i) {
276
0
      for (j = 0; j < w; ++j) {
277
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
278
0
                                    mask[(2 * i + 1) * mask_stride + j]);
279
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
280
0
                                                src1[i * src1_stride + j]);
281
0
      }
282
0
    }
283
0
  }
284
2.19k
}
285
286
#if CONFIG_AV1_HIGHBITDEPTH
287
void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
288
                                 const uint8_t *src0_8, uint32_t src0_stride,
289
                                 const uint8_t *src1_8, uint32_t src1_stride,
290
                                 const uint8_t *mask, uint32_t mask_stride,
291
4.02k
                                 int w, int h, int subw, int subh, int bd) {
292
4.02k
  int i, j;
293
4.02k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
294
4.02k
  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
295
4.02k
  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
296
4.02k
  (void)bd;
297
298
4.02k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
299
4.02k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
300
301
4.02k
  assert(h >= 1);
302
4.02k
  assert(w >= 1);
303
4.02k
  assert(IS_POWER_OF_TWO(h));
304
4.02k
  assert(IS_POWER_OF_TWO(w));
305
306
4.02k
  assert(bd == 8 || bd == 10 || bd == 12);
307
308
4.02k
  if (subw == 0 && subh == 0) {
309
39.1k
    for (i = 0; i < h; ++i) {
310
346k
      for (j = 0; j < w; ++j) {
311
310k
        const int m = mask[i * mask_stride + j];
312
310k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
313
310k
                                                src1[i * src1_stride + j]);
314
310k
      }
315
35.2k
    }
316
3.89k
  } else if (subw == 1 && subh == 1) {
317
60
    for (i = 0; i < h; ++i) {
318
240
      for (j = 0; j < w; ++j) {
319
192
        const int m = ROUND_POWER_OF_TWO(
320
192
            mask[(2 * i) * mask_stride + (2 * j)] +
321
192
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
322
192
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
323
192
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
324
192
            2);
325
192
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
326
192
                                                src1[i * src1_stride + j]);
327
192
      }
328
48
    }
329
114
  } else if (subw == 1 && subh == 0) {
330
1.07k
    for (i = 0; i < h; ++i) {
331
6.14k
      for (j = 0; j < w; ++j) {
332
5.18k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
333
5.18k
                                    mask[i * mask_stride + (2 * j + 1)]);
334
5.18k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
335
5.18k
                                                src1[i * src1_stride + j]);
336
5.18k
      }
337
960
    }
338
114
  } else {
339
0
    for (i = 0; i < h; ++i) {
340
0
      for (j = 0; j < w; ++j) {
341
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
342
0
                                    mask[(2 * i + 1) * mask_stride + j]);
343
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
344
0
                                                src1[i * src1_stride + j]);
345
0
      }
346
0
    }
347
0
  }
348
4.02k
}
349
#endif  // CONFIG_AV1_HIGHBITDEPTH