Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/aom_dsp/avg.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdlib.h>
14
15
#include "config/aom_dsp_rtcd.h"
16
#include "aom_ports/mem.h"
17
18
void aom_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
19
0
                      int *min, int *max) {
20
0
  int i, j;
21
0
  *min = 255;
22
0
  *max = 0;
23
0
  for (i = 0; i < 8; ++i, s += p, d += dp) {
24
0
    for (j = 0; j < 8; ++j) {
25
0
      int diff = abs(s[j] - d[j]);
26
0
      *min = diff < *min ? diff : *min;
27
0
      *max = diff > *max ? diff : *max;
28
0
    }
29
0
  }
30
0
}
31
32
0
unsigned int aom_avg_4x4_c(const uint8_t *s, int p) {
33
0
  int i, j;
34
0
  int sum = 0;
35
0
  for (i = 0; i < 4; ++i, s += p)
36
0
    for (j = 0; j < 4; sum += s[j], ++j) {
37
0
    }
38
39
0
  return (sum + 8) >> 4;
40
0
}
41
42
0
unsigned int aom_avg_8x8_c(const uint8_t *s, int p) {
43
0
  int i, j;
44
0
  int sum = 0;
45
0
  for (i = 0; i < 8; ++i, s += p)
46
0
    for (j = 0; j < 8; sum += s[j], ++j) {
47
0
    }
48
49
0
  return (sum + 32) >> 6;
50
0
}
51
52
void aom_avg_8x8_quad_c(const uint8_t *s, int p, int x16_idx, int y16_idx,
53
0
                        int *avg) {
54
0
  for (int k = 0; k < 4; k++) {
55
0
    const int x8_idx = x16_idx + ((k & 1) << 3);
56
0
    const int y8_idx = y16_idx + ((k >> 1) << 3);
57
0
    const uint8_t *s_tmp = s + y8_idx * p + x8_idx;
58
0
    avg[k] = aom_avg_8x8_c(s_tmp, p);
59
0
  }
60
0
}
61
62
#if CONFIG_AV1_HIGHBITDEPTH
63
0
unsigned int aom_highbd_avg_8x8_c(const uint8_t *s8, int p) {
64
0
  int i, j;
65
0
  int sum = 0;
66
0
  const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
67
0
  for (i = 0; i < 8; ++i, s += p)
68
0
    for (j = 0; j < 8; sum += s[j], ++j) {
69
0
    }
70
71
0
  return (sum + 32) >> 6;
72
0
}
73
74
0
unsigned int aom_highbd_avg_4x4_c(const uint8_t *s8, int p) {
75
0
  int i, j;
76
0
  int sum = 0;
77
0
  const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
78
0
  for (i = 0; i < 4; ++i, s += p)
79
0
    for (j = 0; j < 4; sum += s[j], ++j) {
80
0
    }
81
82
0
  return (sum + 8) >> 4;
83
0
}
84
85
void aom_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
86
0
                             int dp, int *min, int *max) {
87
0
  int i, j;
88
0
  const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
89
0
  const uint16_t *d = CONVERT_TO_SHORTPTR(d8);
90
0
  *min = 65535;
91
0
  *max = 0;
92
0
  for (i = 0; i < 8; ++i, s += p, d += dp) {
93
0
    for (j = 0; j < 8; ++j) {
94
0
      int diff = abs(s[j] - d[j]);
95
0
      *min = diff < *min ? diff : *min;
96
0
      *max = diff > *max ? diff : *max;
97
0
    }
98
0
  }
99
0
}
100
#endif  // CONFIG_AV1_HIGHBITDEPTH
101
102
static void hadamard_col4(const int16_t *src_diff, ptrdiff_t src_stride,
103
0
                          int16_t *coeff) {
104
0
  int16_t b0 = (src_diff[0 * src_stride] + src_diff[1 * src_stride]) >> 1;
105
0
  int16_t b1 = (src_diff[0 * src_stride] - src_diff[1 * src_stride]) >> 1;
106
0
  int16_t b2 = (src_diff[2 * src_stride] + src_diff[3 * src_stride]) >> 1;
107
0
  int16_t b3 = (src_diff[2 * src_stride] - src_diff[3 * src_stride]) >> 1;
108
109
0
  coeff[0] = b0 + b2;
110
0
  coeff[1] = b1 + b3;
111
0
  coeff[2] = b0 - b2;
112
0
  coeff[3] = b1 - b3;
113
0
}
114
115
void aom_hadamard_4x4_c(const int16_t *src_diff, ptrdiff_t src_stride,
116
0
                        tran_low_t *coeff) {
117
0
  int idx;
118
0
  int16_t buffer[16];
119
0
  int16_t buffer2[16];
120
0
  int16_t *tmp_buf = &buffer[0];
121
0
  for (idx = 0; idx < 4; ++idx) {
122
    // src_diff: 9 bit (8b), 13 bit (HBD)
123
    // dynamic range [-255, 255] (8b), [-4095, 4095] (HBD)
124
0
    hadamard_col4(src_diff, src_stride, tmp_buf);
125
0
    tmp_buf += 4;
126
0
    ++src_diff;
127
0
  }
128
129
0
  tmp_buf = &buffer[0];
130
0
  for (idx = 0; idx < 4; ++idx) {
131
    // tmp_buf: 10 bit (8b), 14 bit (HBD)
132
    // dynamic range [-510, 510] (8b), [-8190, 8190] (HBD)
133
    // buffer2: 11 bit (8b), 15 bit (HBD)
134
    // dynamic range [-1020, 1020] (8b), [-16380, 16380] (HBD)
135
0
    hadamard_col4(tmp_buf, 4, buffer2 + 4 * idx);
136
0
    ++tmp_buf;
137
0
  }
138
139
  // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_4x4_sse2).
140
0
  for (int i = 0; i < 4; i++) {
141
0
    for (int j = 0; j < 4; j++) {
142
0
      coeff[i * 4 + j] = (tran_low_t)buffer2[j * 4 + i];
143
0
    }
144
0
  }
145
0
}
146
147
// src_diff: first pass, 9 bit, dynamic range [-255, 255]
148
//           second pass, 12 bit, dynamic range [-2040, 2040]
149
static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride,
150
0
                          int16_t *coeff) {
151
0
  int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
152
0
  int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
153
0
  int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
154
0
  int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
155
0
  int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
156
0
  int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
157
0
  int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
158
0
  int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
159
160
0
  int16_t c0 = b0 + b2;
161
0
  int16_t c1 = b1 + b3;
162
0
  int16_t c2 = b0 - b2;
163
0
  int16_t c3 = b1 - b3;
164
0
  int16_t c4 = b4 + b6;
165
0
  int16_t c5 = b5 + b7;
166
0
  int16_t c6 = b4 - b6;
167
0
  int16_t c7 = b5 - b7;
168
169
0
  coeff[0] = c0 + c4;
170
0
  coeff[7] = c1 + c5;
171
0
  coeff[3] = c2 + c6;
172
0
  coeff[4] = c3 + c7;
173
0
  coeff[2] = c0 - c4;
174
0
  coeff[6] = c1 - c5;
175
0
  coeff[1] = c2 - c6;
176
0
  coeff[5] = c3 - c7;
177
0
}
178
179
void aom_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
180
0
                        tran_low_t *coeff) {
181
0
  int idx;
182
0
  int16_t buffer[64];
183
0
  int16_t buffer2[64];
184
0
  int16_t *tmp_buf = &buffer[0];
185
0
  for (idx = 0; idx < 8; ++idx) {
186
0
    hadamard_col8(src_diff, src_stride, tmp_buf);  // src_diff: 9 bit
187
                                                   // dynamic range [-255, 255]
188
0
    tmp_buf += 8;
189
0
    ++src_diff;
190
0
  }
191
192
0
  tmp_buf = &buffer[0];
193
0
  for (idx = 0; idx < 8; ++idx) {
194
0
    hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx);  // tmp_buf: 12 bit
195
    // dynamic range [-2040, 2040]
196
    // buffer2: 15 bit
197
    // dynamic range [-16320, 16320]
198
0
    ++tmp_buf;
199
0
  }
200
201
  // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_8x8_sse2).
202
0
  for (int i = 0; i < 8; i++) {
203
0
    for (int j = 0; j < 8; j++) {
204
0
      coeff[i * 8 + j] = (tran_low_t)buffer2[j * 8 + i];
205
0
    }
206
0
  }
207
0
}
208
209
void aom_hadamard_lp_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
210
0
                           int16_t *coeff) {
211
0
  int16_t buffer[64];
212
0
  int16_t buffer2[64];
213
0
  int16_t *tmp_buf = &buffer[0];
214
0
  for (int idx = 0; idx < 8; ++idx) {
215
0
    hadamard_col8(src_diff, src_stride, tmp_buf);  // src_diff: 9 bit
216
                                                   // dynamic range [-255, 255]
217
0
    tmp_buf += 8;
218
0
    ++src_diff;
219
0
  }
220
221
0
  tmp_buf = &buffer[0];
222
0
  for (int idx = 0; idx < 8; ++idx) {
223
0
    hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx);  // tmp_buf: 12 bit
224
    // dynamic range [-2040, 2040]
225
    // buffer2: 15 bit
226
    // dynamic range [-16320, 16320]
227
0
    ++tmp_buf;
228
0
  }
229
230
0
  for (int idx = 0; idx < 64; ++idx) coeff[idx] = buffer2[idx];
231
232
  // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_lp_8x8_sse2).
233
0
  for (int i = 0; i < 8; i++) {
234
0
    for (int j = 0; j < 8; j++) {
235
0
      coeff[i * 8 + j] = buffer2[j * 8 + i];
236
0
    }
237
0
  }
238
0
}
239
240
void aom_hadamard_lp_8x8_dual_c(const int16_t *src_diff, ptrdiff_t src_stride,
241
0
                                int16_t *coeff) {
242
0
  for (int i = 0; i < 2; i++) {
243
0
    aom_hadamard_lp_8x8_c(src_diff + (i * 8), src_stride,
244
0
                          (int16_t *)coeff + (i * 64));
245
0
  }
246
0
}
247
248
// In place 16x16 2D Hadamard transform
249
void aom_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride,
250
0
                          tran_low_t *coeff) {
251
0
  int idx;
252
0
  for (idx = 0; idx < 4; ++idx) {
253
    // src_diff: 9 bit, dynamic range [-255, 255]
254
0
    const int16_t *src_ptr =
255
0
        src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8;
256
0
    aom_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
257
0
  }
258
259
  // coeff: 15 bit, dynamic range [-16320, 16320]
260
0
  for (idx = 0; idx < 64; ++idx) {
261
0
    tran_low_t a0 = coeff[0];
262
0
    tran_low_t a1 = coeff[64];
263
0
    tran_low_t a2 = coeff[128];
264
0
    tran_low_t a3 = coeff[192];
265
266
0
    tran_low_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]
267
0
    tran_low_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range
268
0
    tran_low_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]
269
0
    tran_low_t b3 = (a2 - a3) >> 1;
270
271
0
    coeff[0] = b0 + b2;  // 16 bit, [-32640, 32640]
272
0
    coeff[64] = b1 + b3;
273
0
    coeff[128] = b0 - b2;
274
0
    coeff[192] = b1 - b3;
275
276
0
    ++coeff;
277
0
  }
278
279
0
  coeff -= 64;
280
  // Extra shift to match AVX2 output (i.e., aom_hadamard_16x16_avx2).
281
  // Note that to match SSE2 output, it does not need this step.
282
0
  for (int i = 0; i < 16; i++) {
283
0
    for (int j = 0; j < 4; j++) {
284
0
      tran_low_t temp = coeff[i * 16 + 4 + j];
285
0
      coeff[i * 16 + 4 + j] = coeff[i * 16 + 8 + j];
286
0
      coeff[i * 16 + 8 + j] = temp;
287
0
    }
288
0
  }
289
0
}
290
291
void aom_hadamard_lp_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride,
292
0
                             int16_t *coeff) {
293
0
  for (int idx = 0; idx < 4; ++idx) {
294
    // src_diff: 9 bit, dynamic range [-255, 255]
295
0
    const int16_t *src_ptr =
296
0
        src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8;
297
0
    aom_hadamard_lp_8x8_c(src_ptr, src_stride, coeff + idx * 64);
298
0
  }
299
300
0
  for (int idx = 0; idx < 64; ++idx) {
301
0
    int16_t a0 = coeff[0];
302
0
    int16_t a1 = coeff[64];
303
0
    int16_t a2 = coeff[128];
304
0
    int16_t a3 = coeff[192];
305
306
0
    int16_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]
307
0
    int16_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range
308
0
    int16_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]
309
0
    int16_t b3 = (a2 - a3) >> 1;
310
311
0
    coeff[0] = b0 + b2;  // 16 bit, [-32640, 32640]
312
0
    coeff[64] = b1 + b3;
313
0
    coeff[128] = b0 - b2;
314
0
    coeff[192] = b1 - b3;
315
316
0
    ++coeff;
317
0
  }
318
0
}
319
320
void aom_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride,
321
0
                          tran_low_t *coeff) {
322
0
  int idx;
323
0
  for (idx = 0; idx < 4; ++idx) {
324
    // src_diff: 9 bit, dynamic range [-255, 255]
325
0
    const int16_t *src_ptr =
326
0
        src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16;
327
0
    aom_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256);
328
0
  }
329
330
  // coeff: 16 bit, dynamic range [-32768, 32767]
331
0
  for (idx = 0; idx < 256; ++idx) {
332
0
    tran_low_t a0 = coeff[0];
333
0
    tran_low_t a1 = coeff[256];
334
0
    tran_low_t a2 = coeff[512];
335
0
    tran_low_t a3 = coeff[768];
336
337
0
    tran_low_t b0 = (a0 + a1) >> 2;  // (a0 + a1): 17 bit, [-65536, 65535]
338
0
    tran_low_t b1 = (a0 - a1) >> 2;  // b0-b3: 15 bit, dynamic range
339
0
    tran_low_t b2 = (a2 + a3) >> 2;  // [-16384, 16383]
340
0
    tran_low_t b3 = (a2 - a3) >> 2;
341
342
0
    coeff[0] = b0 + b2;  // 16 bit, [-32768, 32767]
343
0
    coeff[256] = b1 + b3;
344
0
    coeff[512] = b0 - b2;
345
0
    coeff[768] = b1 - b3;
346
347
0
    ++coeff;
348
0
  }
349
0
}
350
351
#if CONFIG_AV1_HIGHBITDEPTH
352
static void hadamard_highbd_col8_first_pass(const int16_t *src_diff,
353
                                            ptrdiff_t src_stride,
354
0
                                            int16_t *coeff) {
355
0
  int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
356
0
  int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
357
0
  int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
358
0
  int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
359
0
  int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
360
0
  int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
361
0
  int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
362
0
  int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
363
364
0
  int16_t c0 = b0 + b2;
365
0
  int16_t c1 = b1 + b3;
366
0
  int16_t c2 = b0 - b2;
367
0
  int16_t c3 = b1 - b3;
368
0
  int16_t c4 = b4 + b6;
369
0
  int16_t c5 = b5 + b7;
370
0
  int16_t c6 = b4 - b6;
371
0
  int16_t c7 = b5 - b7;
372
373
0
  coeff[0] = c0 + c4;
374
0
  coeff[7] = c1 + c5;
375
0
  coeff[3] = c2 + c6;
376
0
  coeff[4] = c3 + c7;
377
0
  coeff[2] = c0 - c4;
378
0
  coeff[6] = c1 - c5;
379
0
  coeff[1] = c2 - c6;
380
0
  coeff[5] = c3 - c7;
381
0
}
382
383
// src_diff: 16 bit, dynamic range [-32760, 32760]
384
// coeff: 19 bit
385
static void hadamard_highbd_col8_second_pass(const int16_t *src_diff,
386
                                             ptrdiff_t src_stride,
387
0
                                             int32_t *coeff) {
388
0
  int32_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
389
0
  int32_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
390
0
  int32_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
391
0
  int32_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
392
0
  int32_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
393
0
  int32_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
394
0
  int32_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
395
0
  int32_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
396
397
0
  int32_t c0 = b0 + b2;
398
0
  int32_t c1 = b1 + b3;
399
0
  int32_t c2 = b0 - b2;
400
0
  int32_t c3 = b1 - b3;
401
0
  int32_t c4 = b4 + b6;
402
0
  int32_t c5 = b5 + b7;
403
0
  int32_t c6 = b4 - b6;
404
0
  int32_t c7 = b5 - b7;
405
406
0
  coeff[0] = c0 + c4;
407
0
  coeff[7] = c1 + c5;
408
0
  coeff[3] = c2 + c6;
409
0
  coeff[4] = c3 + c7;
410
0
  coeff[2] = c0 - c4;
411
0
  coeff[6] = c1 - c5;
412
0
  coeff[1] = c2 - c6;
413
0
  coeff[5] = c3 - c7;
414
0
}
415
416
// The order of the output coeff of the hadamard is not important. For
417
// optimization purposes the final transpose may be skipped.
418
void aom_highbd_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
419
0
                               tran_low_t *coeff) {
420
0
  int idx;
421
0
  int16_t buffer[64];
422
0
  int32_t buffer2[64];
423
0
  int16_t *tmp_buf = &buffer[0];
424
0
  for (idx = 0; idx < 8; ++idx) {
425
    // src_diff: 13 bit
426
    // buffer: 16 bit, dynamic range [-32760, 32760]
427
0
    hadamard_highbd_col8_first_pass(src_diff, src_stride, tmp_buf);
428
0
    tmp_buf += 8;
429
0
    ++src_diff;
430
0
  }
431
432
0
  tmp_buf = &buffer[0];
433
0
  for (idx = 0; idx < 8; ++idx) {
434
    // buffer: 16 bit
435
    // buffer2: 19 bit, dynamic range [-262080, 262080]
436
0
    hadamard_highbd_col8_second_pass(tmp_buf, 8, buffer2 + 8 * idx);
437
0
    ++tmp_buf;
438
0
  }
439
440
0
  for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx];
441
0
}
442
443
// In place 16x16 2D Hadamard transform
444
void aom_highbd_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride,
445
0
                                 tran_low_t *coeff) {
446
0
  int idx;
447
0
  for (idx = 0; idx < 4; ++idx) {
448
    // src_diff: 13 bit, dynamic range [-4095, 4095]
449
0
    const int16_t *src_ptr =
450
0
        src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8;
451
0
    aom_highbd_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
452
0
  }
453
454
  // coeff: 19 bit, dynamic range [-262080, 262080]
455
0
  for (idx = 0; idx < 64; ++idx) {
456
0
    tran_low_t a0 = coeff[0];
457
0
    tran_low_t a1 = coeff[64];
458
0
    tran_low_t a2 = coeff[128];
459
0
    tran_low_t a3 = coeff[192];
460
461
0
    tran_low_t b0 = (a0 + a1) >> 1;
462
0
    tran_low_t b1 = (a0 - a1) >> 1;
463
0
    tran_low_t b2 = (a2 + a3) >> 1;
464
0
    tran_low_t b3 = (a2 - a3) >> 1;
465
466
    // new coeff dynamic range: 20 bit
467
0
    coeff[0] = b0 + b2;
468
0
    coeff[64] = b1 + b3;
469
0
    coeff[128] = b0 - b2;
470
0
    coeff[192] = b1 - b3;
471
472
0
    ++coeff;
473
0
  }
474
0
}
475
476
void aom_highbd_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride,
477
0
                                 tran_low_t *coeff) {
478
0
  int idx;
479
0
  for (idx = 0; idx < 4; ++idx) {
480
    // src_diff: 13 bit, dynamic range [-4095, 4095]
481
0
    const int16_t *src_ptr =
482
0
        src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16;
483
0
    aom_highbd_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256);
484
0
  }
485
486
  // coeff: 20 bit
487
0
  for (idx = 0; idx < 256; ++idx) {
488
0
    tran_low_t a0 = coeff[0];
489
0
    tran_low_t a1 = coeff[256];
490
0
    tran_low_t a2 = coeff[512];
491
0
    tran_low_t a3 = coeff[768];
492
493
0
    tran_low_t b0 = (a0 + a1) >> 2;
494
0
    tran_low_t b1 = (a0 - a1) >> 2;
495
0
    tran_low_t b2 = (a2 + a3) >> 2;
496
0
    tran_low_t b3 = (a2 - a3) >> 2;
497
498
    // new coeff dynamic range: 20 bit
499
0
    coeff[0] = b0 + b2;
500
0
    coeff[256] = b1 + b3;
501
0
    coeff[512] = b0 - b2;
502
0
    coeff[768] = b1 - b3;
503
504
0
    ++coeff;
505
0
  }
506
0
}
507
#endif  // CONFIG_AV1_HIGHBITDEPTH
508
509
// coeff: 20 bits, dynamic range [-524287, 524287].
510
// length: value range {16, 32, 64, 128, 256, 512, 1024}.
511
0
int aom_satd_c(const tran_low_t *coeff, int length) {
512
0
  int i;
513
0
  int satd = 0;
514
0
  for (i = 0; i < length; ++i) satd += abs(coeff[i]);
515
516
  // satd: 30 bits, dynamic range [-524287 * 1024, 524287 * 1024]
517
0
  return satd;
518
0
}
519
520
0
int aom_satd_lp_c(const int16_t *coeff, int length) {
521
0
  int satd = 0;
522
0
  for (int i = 0; i < length; ++i) satd += abs(coeff[i]);
523
524
  // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
525
0
  return satd;
526
0
}
527
528
// Integer projection onto row vectors.
529
// height: value range {16, 32, 64, 128}.
530
void aom_int_pro_row_c(int16_t *hbuf, const uint8_t *ref, const int ref_stride,
531
0
                       const int width, const int height, int norm_factor) {
532
0
  assert(height >= 2);
533
0
  for (int idx = 0; idx < width; ++idx) {
534
0
    hbuf[idx] = 0;
535
    // hbuf[idx]: 14 bit, dynamic range [0, 32640].
536
0
    for (int i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride];
537
    // hbuf[idx]: 9 bit, dynamic range [0, 1020].
538
0
    hbuf[idx] >>= norm_factor;
539
0
    ++ref;
540
0
  }
541
0
}
542
543
// width: value range {16, 32, 64, 128}.
544
void aom_int_pro_col_c(int16_t *vbuf, const uint8_t *ref, const int ref_stride,
545
0
                       const int width, const int height, int norm_factor) {
546
0
  for (int ht = 0; ht < height; ++ht) {
547
0
    int16_t sum = 0;
548
    // sum: 14 bit, dynamic range [0, 32640]
549
0
    for (int idx = 0; idx < width; ++idx) sum += ref[idx];
550
0
    vbuf[ht] = sum >> norm_factor;
551
0
    ref += ref_stride;
552
0
  }
553
0
}
554
555
// ref: [0 - 510]
556
// src: [0 - 510]
557
// bwl: {2, 3, 4, 5}
558
0
int aom_vector_var_c(const int16_t *ref, const int16_t *src, int bwl) {
559
0
  int i;
560
0
  int width = 4 << bwl;
561
0
  int sse = 0, mean = 0, var;
562
563
0
  for (i = 0; i < width; ++i) {
564
0
    int diff = ref[i] - src[i];  // diff: dynamic range [-510, 510], 10 bits.
565
0
    mean += diff;                // mean: dynamic range 16 bits.
566
0
    sse += diff * diff;          // sse:  dynamic range 26 bits.
567
0
  }
568
569
  // (mean * mean): dynamic range 31 bits.
570
  // If width == 128, the mean can be 510 * 128 = 65280, and log2(65280 ** 2) ~=
571
  // 31.99, so it needs to be casted to unsigned int to compute its square.
572
0
  const unsigned int mean_abs = abs(mean);
573
0
  var = sse - ((mean_abs * mean_abs) >> (bwl + 2));
574
0
  return var;
575
0
}