Coverage Report

Created: 2025-07-16 07:53

/src/aom/aom_dsp/blend_a64_mask.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
14
#include "aom/aom_integer.h"
15
#include "aom_ports/mem.h"
16
#include "aom_dsp/blend.h"
17
#include "aom_dsp/aom_dsp_common.h"
18
19
#include "config/aom_dsp_rtcd.h"
20
21
// Blending with alpha mask. Mask values come from the range [0, 64],
22
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
23
// be the same as dst, or dst can be different from both sources.
24
25
// NOTE(rachelbarker): The input and output of aom_blend_a64_d16_mask_c() are
26
// in a higher intermediate precision, and will later be rounded down to pixel
27
// precision.
28
// Thus, in order to avoid double-rounding, we want to use normal right shifts
29
// within this function, not ROUND_POWER_OF_TWO.
30
// This works because of the identity:
31
// ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
32
//
33
// In contrast, the output of the non-d16 functions will not be further rounded,
34
// so we *should* use ROUND_POWER_OF_TWO there.
35
36
void aom_lowbd_blend_a64_d16_mask_c(
37
    uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
38
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
39
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
40
1.15k
    ConvolveParams *conv_params) {
41
1.15k
  int i, j;
42
1.15k
  const int bd = 8;
43
1.15k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
44
1.15k
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
45
1.15k
                           (1 << (offset_bits - conv_params->round_1 - 1));
46
1.15k
  const int round_bits =
47
1.15k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
48
49
1.15k
  assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
50
1.15k
  assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
51
52
1.15k
  assert(h >= 4);
53
1.15k
  assert(w >= 4);
54
1.15k
  assert(IS_POWER_OF_TWO(h));
55
1.15k
  assert(IS_POWER_OF_TWO(w));
56
57
1.15k
  if (subw == 0 && subh == 0) {
58
15.3k
    for (i = 0; i < h; ++i) {
59
146k
      for (j = 0; j < w; ++j) {
60
131k
        int32_t res;
61
131k
        const int m = mask[i * mask_stride + j];
62
131k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
63
131k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
64
131k
                    (int32_t)src1[i * src1_stride + j]) >>
65
131k
               AOM_BLEND_A64_ROUND_BITS);
66
131k
        res -= round_offset;
67
131k
        dst[i * dst_stride + j] =
68
131k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
69
131k
      }
70
14.4k
    }
71
974
  } else if (subw == 1 && subh == 1) {
72
220
    for (i = 0; i < h; ++i) {
73
880
      for (j = 0; j < w; ++j) {
74
704
        int32_t res;
75
704
        const int m = ROUND_POWER_OF_TWO(
76
704
            mask[(2 * i) * mask_stride + (2 * j)] +
77
704
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
78
704
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
79
704
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
80
704
            2);
81
704
        res = ((m * (int32_t)src0[i * src0_stride + j] +
82
704
                (AOM_BLEND_A64_MAX_ALPHA - m) *
83
704
                    (int32_t)src1[i * src1_stride + j]) >>
84
704
               AOM_BLEND_A64_ROUND_BITS);
85
704
        res -= round_offset;
86
704
        dst[i * dst_stride + j] =
87
704
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
88
704
      }
89
176
    }
90
136
  } else if (subw == 1 && subh == 0) {
91
1.43k
    for (i = 0; i < h; ++i) {
92
18.4k
      for (j = 0; j < w; ++j) {
93
17.1k
        int32_t res;
94
17.1k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
95
17.1k
                                    mask[i * mask_stride + (2 * j + 1)]);
96
17.1k
        res = ((m * (int32_t)src0[i * src0_stride + j] +
97
17.1k
                (AOM_BLEND_A64_MAX_ALPHA - m) *
98
17.1k
                    (int32_t)src1[i * src1_stride + j]) >>
99
17.1k
               AOM_BLEND_A64_ROUND_BITS);
100
17.1k
        res -= round_offset;
101
17.1k
        dst[i * dst_stride + j] =
102
17.1k
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
103
17.1k
      }
104
1.29k
    }
105
136
  } else {
106
0
    for (i = 0; i < h; ++i) {
107
0
      for (j = 0; j < w; ++j) {
108
0
        int32_t res;
109
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
110
0
                                    mask[(2 * i + 1) * mask_stride + j]);
111
0
        res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
112
0
                         (AOM_BLEND_A64_MAX_ALPHA - m) *
113
0
                             (int32_t)src1[i * src1_stride + j]) >>
114
0
               AOM_BLEND_A64_ROUND_BITS);
115
0
        res -= round_offset;
116
0
        dst[i * dst_stride + j] =
117
0
            clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
118
0
      }
119
0
    }
120
0
  }
121
1.15k
}
122
123
#if CONFIG_AV1_HIGHBITDEPTH
124
void aom_highbd_blend_a64_d16_mask_c(
125
    uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
126
    uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
127
    const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
128
1.95k
    ConvolveParams *conv_params, const int bd) {
129
1.95k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
130
1.95k
  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
131
1.95k
                           (1 << (offset_bits - conv_params->round_1 - 1));
132
1.95k
  const int round_bits =
133
1.95k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
134
1.95k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
135
136
1.95k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
137
1.95k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
138
139
1.95k
  assert(h >= 1);
140
1.95k
  assert(w >= 1);
141
1.95k
  assert(IS_POWER_OF_TWO(h));
142
1.95k
  assert(IS_POWER_OF_TWO(w));
143
144
  // excerpt from clip_pixel_highbd()
145
  // set saturation_value to (1 << bd) - 1
146
1.95k
  unsigned int saturation_value;
147
1.95k
  switch (bd) {
148
0
    case 8:
149
0
    default: saturation_value = 255; break;
150
1.94k
    case 10: saturation_value = 1023; break;
151
15
    case 12: saturation_value = 4095; break;
152
1.95k
  }
153
154
1.95k
  if (subw == 0 && subh == 0) {
155
23.2k
    for (int i = 0; i < h; ++i) {
156
229k
      for (int j = 0; j < w; ++j) {
157
207k
        int32_t res;
158
207k
        const int m = mask[j];
159
207k
        res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
160
207k
               AOM_BLEND_A64_ROUND_BITS);
161
207k
        res -= round_offset;
162
207k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
163
207k
        dst[j] = AOMMIN(v, saturation_value);
164
207k
      }
165
21.7k
      mask += mask_stride;
166
21.7k
      src0 += src0_stride;
167
21.7k
      src1 += src1_stride;
168
21.7k
      dst += dst_stride;
169
21.7k
    }
170
1.47k
  } else if (subw == 1 && subh == 1) {
171
170
    for (int i = 0; i < h; ++i) {
172
680
      for (int j = 0; j < w; ++j) {
173
544
        int32_t res;
174
544
        const int m = ROUND_POWER_OF_TWO(
175
544
            mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] +
176
544
                mask[mask_stride + 2 * j + 1],
177
544
            2);
178
544
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
179
544
              AOM_BLEND_A64_ROUND_BITS;
180
544
        res -= round_offset;
181
544
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
182
544
        dst[j] = AOMMIN(v, saturation_value);
183
544
      }
184
136
      mask += 2 * mask_stride;
185
136
      src0 += src0_stride;
186
136
      src1 += src1_stride;
187
136
      dst += dst_stride;
188
136
    }
189
452
  } else if (subw == 1 && subh == 0) {
190
4.54k
    for (int i = 0; i < h; ++i) {
191
48.1k
      for (int j = 0; j < w; ++j) {
192
44.0k
        int32_t res;
193
44.0k
        const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
194
44.0k
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
195
44.0k
              AOM_BLEND_A64_ROUND_BITS;
196
44.0k
        res -= round_offset;
197
44.0k
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
198
44.0k
        dst[j] = AOMMIN(v, saturation_value);
199
44.0k
      }
200
4.09k
      mask += mask_stride;
201
4.09k
      src0 += src0_stride;
202
4.09k
      src1 += src1_stride;
203
4.09k
      dst += dst_stride;
204
4.09k
    }
205
452
  } else {
206
0
    for (int i = 0; i < h; ++i) {
207
0
      for (int j = 0; j < w; ++j) {
208
0
        int32_t res;
209
0
        const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
210
0
        res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
211
0
              AOM_BLEND_A64_ROUND_BITS;
212
0
        res -= round_offset;
213
0
        unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
214
0
        dst[j] = AOMMIN(v, saturation_value);
215
0
      }
216
0
      mask += 2 * mask_stride;
217
0
      src0 += src0_stride;
218
0
      src1 += src1_stride;
219
0
      dst += dst_stride;
220
0
    }
221
0
  }
222
1.95k
}
223
#endif  // CONFIG_AV1_HIGHBITDEPTH
224
225
// Blending with alpha mask. Mask values come from the range [0, 64],
226
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
227
// be the same as dst, or dst can be different from both sources.
228
229
void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
230
                          const uint8_t *src0, uint32_t src0_stride,
231
                          const uint8_t *src1, uint32_t src1_stride,
232
                          const uint8_t *mask, uint32_t mask_stride, int w,
233
1.33k
                          int h, int subw, int subh) {
234
1.33k
  int i, j;
235
236
1.33k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
237
1.33k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
238
239
1.33k
  assert(h >= 1);
240
1.33k
  assert(w >= 1);
241
1.33k
  assert(IS_POWER_OF_TWO(h));
242
1.33k
  assert(IS_POWER_OF_TWO(w));
243
244
1.33k
  if (subw == 0 && subh == 0) {
245
13.8k
    for (i = 0; i < h; ++i) {
246
145k
      for (j = 0; j < w; ++j) {
247
133k
        const int m = mask[i * mask_stride + j];
248
133k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
249
133k
                                                src1[i * src1_stride + j]);
250
133k
      }
251
12.5k
    }
252
1.27k
  } else if (subw == 1 && subh == 1) {
253
30
    for (i = 0; i < h; ++i) {
254
120
      for (j = 0; j < w; ++j) {
255
96
        const int m = ROUND_POWER_OF_TWO(
256
96
            mask[(2 * i) * mask_stride + (2 * j)] +
257
96
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
258
96
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
259
96
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
260
96
            2);
261
96
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
262
96
                                                src1[i * src1_stride + j]);
263
96
      }
264
24
    }
265
60
  } else if (subw == 1 && subh == 0) {
266
540
    for (i = 0; i < h; ++i) {
267
2.84k
      for (j = 0; j < w; ++j) {
268
2.36k
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
269
2.36k
                                    mask[i * mask_stride + (2 * j + 1)]);
270
2.36k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
271
2.36k
                                                src1[i * src1_stride + j]);
272
2.36k
      }
273
480
    }
274
60
  } else {
275
0
    for (i = 0; i < h; ++i) {
276
0
      for (j = 0; j < w; ++j) {
277
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
278
0
                                    mask[(2 * i + 1) * mask_stride + j]);
279
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
280
0
                                                src1[i * src1_stride + j]);
281
0
      }
282
0
    }
283
0
  }
284
1.33k
}
285
286
#if CONFIG_AV1_HIGHBITDEPTH
287
void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
288
                                 const uint8_t *src0_8, uint32_t src0_stride,
289
                                 const uint8_t *src1_8, uint32_t src1_stride,
290
                                 const uint8_t *mask, uint32_t mask_stride,
291
2.03k
                                 int w, int h, int subw, int subh, int bd) {
292
2.03k
  int i, j;
293
2.03k
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
294
2.03k
  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
295
2.03k
  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
296
2.03k
  (void)bd;
297
298
2.03k
  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
299
2.03k
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
300
301
2.03k
  assert(h >= 1);
302
2.03k
  assert(w >= 1);
303
2.03k
  assert(IS_POWER_OF_TWO(h));
304
2.03k
  assert(IS_POWER_OF_TWO(w));
305
306
2.03k
  assert(bd == 8 || bd == 10 || bd == 12);
307
308
2.03k
  if (subw == 0 && subh == 0) {
309
20.3k
    for (i = 0; i < h; ++i) {
310
184k
      for (j = 0; j < w; ++j) {
311
165k
        const int m = mask[i * mask_stride + j];
312
165k
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
313
165k
                                                src1[i * src1_stride + j]);
314
165k
      }
315
18.4k
    }
316
1.95k
  } else if (subw == 1 && subh == 1) {
317
310
    for (i = 0; i < h; ++i) {
318
1.24k
      for (j = 0; j < w; ++j) {
319
992
        const int m = ROUND_POWER_OF_TWO(
320
992
            mask[(2 * i) * mask_stride + (2 * j)] +
321
992
                mask[(2 * i + 1) * mask_stride + (2 * j)] +
322
992
                mask[(2 * i) * mask_stride + (2 * j + 1)] +
323
992
                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
324
992
            2);
325
992
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
326
992
                                                src1[i * src1_stride + j]);
327
992
      }
328
248
    }
329
62
  } else if (subw == 1 && subh == 0) {
330
162
    for (i = 0; i < h; ++i) {
331
1.04k
      for (j = 0; j < w; ++j) {
332
896
        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
333
896
                                    mask[i * mask_stride + (2 * j + 1)]);
334
896
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
335
896
                                                src1[i * src1_stride + j]);
336
896
      }
337
144
    }
338
18
  } else {
339
0
    for (i = 0; i < h; ++i) {
340
0
      for (j = 0; j < w; ++j) {
341
0
        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
342
0
                                    mask[(2 * i + 1) * mask_stride + j]);
343
0
        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
344
0
                                                src1[i * src1_stride + j]);
345
0
      }
346
0
    }
347
0
  }
348
2.03k
}
349
#endif  // CONFIG_AV1_HIGHBITDEPTH