/src/aom/av1/encoder/wedge_utils.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include <assert.h>

#include "aom/aom_integer.h"

#include "aom_ports/mem.h"

#include "aom_dsp/aom_dsp_common.h"

#include "av1/common/reconinter.h"

#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)

/**
 * Computes SSE of a compound predictor constructed from 2 fundamental
 * predictors p0 and p1 using blending with mask.
 *
 * r1:  Residuals of p1.
 *      (source - p1)
 * d:   Difference of p1 and p0.
 *      (p1 - p0)
 * m:   The blending mask
 * N:   Number of pixels
 *
 * 'r1', 'd', and 'm' are contiguous.
 *
 * Computes:
 *  Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
 *  Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
 *    where r0 is (source - p0), and r1 is (source - p1), which is in turn
 *    is equivalent to:
 *  Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
 *    which is the SSE of the residuals of the compound predictor scaled up by
 *    MAX_MASK_VALUE**2.
 *
 * Note that we clamp the partial term in the loop to 16 bits signed. This is
 * to facilitate equivalent SIMD implementation. It should have no effect if
 * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
 * holds for 8 bit input, and on real input, it should hold practically always,
 * as residuals are expected to be small.
 */
uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
                                        const uint8_t *m, int N) {
  uint64_t csse = 0;
  int i;

  for (i = 0; i < N; i++) {
    int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
    t = clamp(t, INT16_MIN, INT16_MAX);
    csse += t * t;
  }
  return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
}

/**
 * Choose the mask sign for a compound predictor.
 *
 * ds:    Difference of the squares of the residuals.
 *        r0**2 - r1**2
 * m:     The blending mask
 * N:     Number of pixels
 * limit: Pre-computed threshold value.
 *        MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
 *
 * 'ds' and 'm' are contiguous.
 *
 * Returns true if the negated mask has lower SSE compared to the positive
 * mask. Computation is based on:
 *  Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
 *                                     >
 *                                Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
 *
 *  which can be simplified to:
 *
 *  Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
 *
 *  The right hand side does not depend on the mask, and needs to be passed as
 *  the 'limit' parameter.
 *
 *  After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
 *  hand side is simply a scalar product between an int16_t and uint8_t vector.
 *
 *  Note that for efficiency, ds is stored on 16 bits. Real input residuals
 *  being small, this should not cause a noticeable issue.
 */
int8_t av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m,
                                       int N, int64_t limit) {
  int64_t acc = 0;

  do {
    acc += *ds++ * *m++;
  } while (--N);

  return acc > limit;
}

/**
 * Compute the element-wise difference of the squares of 2 arrays.
 *
 * d: Difference of the squares of the inputs: a**2 - b**2
 * a: First input array
 * b: Second input array
 * N: Number of elements
 *
 * 'd', 'a', and 'b' are contiguous.
 *
 * The result is saturated to signed 16 bits.
 */
void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
                                       const int16_t *b, int N) {
  int i;

  for (i = 0; i < N; i++)
    d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
}

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3		*
4		* This source code is subject to the terms of the BSD 2 Clause License and
5		* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6		* was not distributed with this source code in the LICENSE file, you can
7		* obtain it at www.aomedia.org/license/software. If the Alliance for Open
8		* Media Patent License 1.0 was not distributed with this source code in the
9		* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10		*/
11
12		#include <assert.h>
13
14		#include "aom/aom_integer.h"
15
16		#include "aom_ports/mem.h"
17
18		#include "aom_dsp/aom_dsp_common.h"
19
20		#include "av1/common/reconinter.h"
21
22	0	#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
23
24		/**
25		* Computes SSE of a compound predictor constructed from 2 fundamental
26		* predictors p0 and p1 using blending with mask.
27		*
28		* r1: Residuals of p1.
29		* (source - p1)
30		* d: Difference of p1 and p0.
31		* (p1 - p0)
32		* m: The blending mask
33		* N: Number of pixels
34		*
35		* 'r1', 'd', and 'm' are contiguous.
36		*
37		* Computes:
38		* Sum((MAX_MASK_VALUEr1 + maskd)**2), which is equivalent to:
39		* Sum((maskr0 + (MAX_MASK_VALUE-mask)r1)**2),
40		* where r0 is (source - p0), and r1 is (source - p1), which is in turn
41		* is equivalent to:
42		* Sum((sourceMAX_MASK_VALUE - (maskp0 + (MAX_MASK_VALUE-mask)p1))*2),
43		* which is the SSE of the residuals of the compound predictor scaled up by
44		* MAX_MASK_VALUE**2.
45		*
46		* Note that we clamp the partial term in the loop to 16 bits signed. This is
47		* to facilitate equivalent SIMD implementation. It should have no effect if
48		* residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
49		* holds for 8 bit input, and on real input, it should hold practically always,
50		* as residuals are expected to be small.
51		*/
52		uint64_t av1_wedge_sse_from_residuals_c(const int16_t r1, const int16_t d,
53	0	const uint8_t *m, int N) {
54	0	uint64_t csse = 0;
55	0	int i;
56
57	0	for (i = 0; i < N; i++) {
58	0	int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
59	0	t = clamp(t, INT16_MIN, INT16_MAX);
60	0	csse += t * t;
61	0	}
62	0	return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
63	0	}
64
65		/**
66		* Choose the mask sign for a compound predictor.
67		*
68		* ds: Difference of the squares of the residuals.
69		* r02 - r12
70		* m: The blending mask
71		* N: Number of pixels
72		* limit: Pre-computed threshold value.
73		* MAX_MASK_VALUE/2 * (sum(r02) - sum(r12))
74		*
75		* 'ds' and 'm' are contiguous.
76		*
77		* Returns true if the negated mask has lower SSE compared to the positive
78		* mask. Computation is based on:
79		* Sum((maskr0 + (MAX_MASK_VALUE-mask)r1)**2)
80		* >
81		* Sum(((MAX_MASK_VALUE-mask)r0 + maskr1)**2)
82		*
83		* which can be simplified to:
84		*
85		* Sum(mask(r02 - r12)) > MAX_MASK_VALUE/2 (sum(r02) - sum(r12))
86		*
87		* The right hand side does not depend on the mask, and needs to be passed as
88		* the 'limit' parameter.
89		*
90		* After pre-computing (r02 - r12), which is passed in as 'ds', the left
91		* hand side is simply a scalar product between an int16_t and uint8_t vector.
92		*
93		* Note that for efficiency, ds is stored on 16 bits. Real input residuals
94		* being small, this should not cause a noticeable issue.
95		*/
96		int8_t av1_wedge_sign_from_residuals_c(const int16_t ds, const uint8_t m,
97	0	int N, int64_t limit) {
98	0	int64_t acc = 0;
99
100	0	do {
101	0	acc += ds++ *m++;
102	0	} while (--N);
103
104	0	return acc > limit;
105	0	}
106
107		/**
108		* Compute the element-wise difference of the squares of 2 arrays.
109		*
110		* d: Difference of the squares of the inputs: a2 - b2
111		* a: First input array
112		* b: Second input array
113		* N: Number of elements
114		*
115		* 'd', 'a', and 'b' are contiguous.
116		*
117		* The result is saturated to signed 16 bits.
118		*/
119		void av1_wedge_compute_delta_squares_c(int16_t d, const int16_t a,
120	0	const int16_t *b, int N) {
121	0	int i;
122
123	0	for (i = 0; i < N; i++)
124	0	d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
125	0	}

Coverage Report

Created: 2025-06-22 08:04