Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <stdlib.h> |
13 | | |
14 | | #include "config/aom_dsp_rtcd.h" |
15 | | #include "aom_ports/mem.h" |
16 | | |
17 | | void aom_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, |
18 | 0 | int *min, int *max) { |
19 | 0 | int i, j; |
20 | 0 | *min = 255; |
21 | 0 | *max = 0; |
22 | 0 | for (i = 0; i < 8; ++i, s += p, d += dp) { |
23 | 0 | for (j = 0; j < 8; ++j) { |
24 | 0 | int diff = abs(s[j] - d[j]); |
25 | 0 | *min = diff < *min ? diff : *min; |
26 | 0 | *max = diff > *max ? diff : *max; |
27 | 0 | } |
28 | 0 | } |
29 | 0 | } |
30 | | |
31 | 0 | unsigned int aom_avg_4x4_c(const uint8_t *s, int p) { |
32 | 0 | int i, j; |
33 | 0 | int sum = 0; |
34 | 0 | for (i = 0; i < 4; ++i, s += p) |
35 | 0 | for (j = 0; j < 4; sum += s[j], ++j) { |
36 | 0 | } |
37 | |
|
38 | 0 | return (sum + 8) >> 4; |
39 | 0 | } |
40 | | |
41 | 0 | unsigned int aom_avg_8x8_c(const uint8_t *s, int p) { |
42 | 0 | int i, j; |
43 | 0 | int sum = 0; |
44 | 0 | for (i = 0; i < 8; ++i, s += p) |
45 | 0 | for (j = 0; j < 8; sum += s[j], ++j) { |
46 | 0 | } |
47 | |
|
48 | 0 | return (sum + 32) >> 6; |
49 | 0 | } |
50 | | |
51 | | #if CONFIG_AV1_HIGHBITDEPTH |
52 | 0 | unsigned int aom_highbd_avg_8x8_c(const uint8_t *s8, int p) { |
53 | 0 | int i, j; |
54 | 0 | int sum = 0; |
55 | 0 | const uint16_t *s = CONVERT_TO_SHORTPTR(s8); |
56 | 0 | for (i = 0; i < 8; ++i, s += p) |
57 | 0 | for (j = 0; j < 8; sum += s[j], ++j) { |
58 | 0 | } |
59 | |
|
60 | 0 | return (sum + 32) >> 6; |
61 | 0 | } |
62 | | |
63 | 0 | unsigned int aom_highbd_avg_4x4_c(const uint8_t *s8, int p) { |
64 | 0 | int i, j; |
65 | 0 | int sum = 0; |
66 | 0 | const uint16_t *s = CONVERT_TO_SHORTPTR(s8); |
67 | 0 | for (i = 0; i < 4; ++i, s += p) |
68 | 0 | for (j = 0; j < 4; sum += s[j], ++j) { |
69 | 0 | } |
70 | |
|
71 | 0 | return (sum + 8) >> 4; |
72 | 0 | } |
73 | | |
74 | | void aom_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8, |
75 | 0 | int dp, int *min, int *max) { |
76 | 0 | int i, j; |
77 | 0 | const uint16_t *s = CONVERT_TO_SHORTPTR(s8); |
78 | 0 | const uint16_t *d = CONVERT_TO_SHORTPTR(d8); |
79 | 0 | *min = 255; |
80 | 0 | *max = 0; |
81 | 0 | for (i = 0; i < 8; ++i, s += p, d += dp) { |
82 | 0 | for (j = 0; j < 8; ++j) { |
83 | 0 | int diff = abs(s[j] - d[j]); |
84 | 0 | *min = diff < *min ? diff : *min; |
85 | 0 | *max = diff > *max ? diff : *max; |
86 | 0 | } |
87 | 0 | } |
88 | 0 | } |
89 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
90 | | |
91 | | static void hadamard_col4(const int16_t *src_diff, ptrdiff_t src_stride, |
92 | 3.11M | int16_t *coeff) { |
93 | 3.11M | int16_t b0 = (src_diff[0 * src_stride] + src_diff[1 * src_stride]) >> 1; |
94 | 3.11M | int16_t b1 = (src_diff[0 * src_stride] - src_diff[1 * src_stride]) >> 1; |
95 | 3.11M | int16_t b2 = (src_diff[2 * src_stride] + src_diff[3 * src_stride]) >> 1; |
96 | 3.11M | int16_t b3 = (src_diff[2 * src_stride] - src_diff[3 * src_stride]) >> 1; |
97 | | |
98 | 3.11M | coeff[0] = b0 + b2; |
99 | 3.11M | coeff[1] = b1 + b3; |
100 | 3.11M | coeff[2] = b0 - b2; |
101 | 3.11M | coeff[3] = b1 - b3; |
102 | 3.11M | } |
103 | | |
104 | | void aom_hadamard_4x4_c(const int16_t *src_diff, ptrdiff_t src_stride, |
105 | 388k | tran_low_t *coeff) { |
106 | 388k | int idx; |
107 | 388k | int16_t buffer[16]; |
108 | 388k | int16_t buffer2[16]; |
109 | 388k | int16_t *tmp_buf = &buffer[0]; |
110 | 1.94M | for (idx = 0; idx < 4; ++idx) { |
111 | 1.55M | hadamard_col4(src_diff, src_stride, tmp_buf); // src_diff: 9 bit |
112 | | // dynamic range [-255, 255] |
113 | 1.55M | tmp_buf += 4; |
114 | 1.55M | ++src_diff; |
115 | 1.55M | } |
116 | | |
117 | 388k | tmp_buf = &buffer[0]; |
118 | 1.94M | for (idx = 0; idx < 4; ++idx) { |
119 | 1.55M | hadamard_col4(tmp_buf, 4, buffer2 + 4 * idx); // tmp_buf: 12 bit |
120 | | // dynamic range [-2040, 2040] |
121 | | // buffer2: 15 bit |
122 | | // dynamic range [-16320, 16320] |
123 | 1.55M | ++tmp_buf; |
124 | 1.55M | } |
125 | | |
126 | 6.60M | for (idx = 0; idx < 16; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; |
127 | 388k | } |
128 | | |
129 | | // src_diff: first pass, 9 bit, dynamic range [-255, 255] |
130 | | // second pass, 12 bit, dynamic range [-2040, 2040] |
131 | | static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride, |
132 | 640M | int16_t *coeff) { |
133 | 640M | int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; |
134 | 640M | int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; |
135 | 640M | int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; |
136 | 640M | int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; |
137 | 640M | int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; |
138 | 640M | int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; |
139 | 640M | int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; |
140 | 640M | int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; |
141 | | |
142 | 640M | int16_t c0 = b0 + b2; |
143 | 640M | int16_t c1 = b1 + b3; |
144 | 640M | int16_t c2 = b0 - b2; |
145 | 640M | int16_t c3 = b1 - b3; |
146 | 640M | int16_t c4 = b4 + b6; |
147 | 640M | int16_t c5 = b5 + b7; |
148 | 640M | int16_t c6 = b4 - b6; |
149 | 640M | int16_t c7 = b5 - b7; |
150 | | |
151 | 640M | coeff[0] = c0 + c4; |
152 | 640M | coeff[7] = c1 + c5; |
153 | 640M | coeff[3] = c2 + c6; |
154 | 640M | coeff[4] = c3 + c7; |
155 | 640M | coeff[2] = c0 - c4; |
156 | 640M | coeff[6] = c1 - c5; |
157 | 640M | coeff[1] = c2 - c6; |
158 | 640M | coeff[5] = c3 - c7; |
159 | 640M | } |
160 | | |
161 | | // The order of the output coeff of the hadamard is not important. For |
162 | | // optimization purposes the final transpose may be skipped. |
163 | | void aom_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, |
164 | 40.6M | tran_low_t *coeff) { |
165 | 40.6M | int idx; |
166 | 40.6M | int16_t buffer[64]; |
167 | 40.6M | int16_t buffer2[64]; |
168 | 40.6M | int16_t *tmp_buf = &buffer[0]; |
169 | 363M | for (idx = 0; idx < 8; ++idx) { |
170 | 322M | hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit |
171 | | // dynamic range [-255, 255] |
172 | 322M | tmp_buf += 8; |
173 | 322M | ++src_diff; |
174 | 322M | } |
175 | | |
176 | 40.6M | tmp_buf = &buffer[0]; |
177 | 366M | for (idx = 0; idx < 8; ++idx) { |
178 | 325M | hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx); // tmp_buf: 12 bit |
179 | | // dynamic range [-2040, 2040] |
180 | | // buffer2: 15 bit |
181 | | // dynamic range [-16320, 16320] |
182 | 325M | ++tmp_buf; |
183 | 325M | } |
184 | | |
185 | 2.63G | for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; |
186 | 40.6M | } |
187 | | |
188 | | void aom_hadamard_lp_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, |
189 | 0 | int16_t *coeff) { |
190 | 0 | int16_t buffer[64]; |
191 | 0 | int16_t buffer2[64]; |
192 | 0 | int16_t *tmp_buf = &buffer[0]; |
193 | 0 | for (int idx = 0; idx < 8; ++idx) { |
194 | 0 | hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit |
195 | | // dynamic range [-255, 255] |
196 | 0 | tmp_buf += 8; |
197 | 0 | ++src_diff; |
198 | 0 | } |
199 | |
|
200 | 0 | tmp_buf = &buffer[0]; |
201 | 0 | for (int idx = 0; idx < 8; ++idx) { |
202 | 0 | hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx); // tmp_buf: 12 bit |
203 | | // dynamic range [-2040, 2040] |
204 | | // buffer2: 15 bit |
205 | | // dynamic range [-16320, 16320] |
206 | 0 | ++tmp_buf; |
207 | 0 | } |
208 | |
|
209 | 0 | for (int idx = 0; idx < 64; ++idx) coeff[idx] = buffer2[idx]; |
210 | 0 | } |
211 | | |
212 | | // In place 16x16 2D Hadamard transform |
213 | | void aom_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, |
214 | 9.53M | tran_low_t *coeff) { |
215 | 9.53M | int idx; |
216 | 47.5M | for (idx = 0; idx < 4; ++idx) { |
217 | | // src_diff: 9 bit, dynamic range [-255, 255] |
218 | 38.0M | const int16_t *src_ptr = |
219 | 38.0M | src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; |
220 | 38.0M | aom_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); |
221 | 38.0M | } |
222 | | |
223 | | // coeff: 15 bit, dynamic range [-16320, 16320] |
224 | 618M | for (idx = 0; idx < 64; ++idx) { |
225 | 609M | tran_low_t a0 = coeff[0]; |
226 | 609M | tran_low_t a1 = coeff[64]; |
227 | 609M | tran_low_t a2 = coeff[128]; |
228 | 609M | tran_low_t a3 = coeff[192]; |
229 | | |
230 | 609M | tran_low_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] |
231 | 609M | tran_low_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range |
232 | 609M | tran_low_t b2 = (a2 + a3) >> 1; // [-16320, 16320] |
233 | 609M | tran_low_t b3 = (a2 - a3) >> 1; |
234 | | |
235 | 609M | coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] |
236 | 609M | coeff[64] = b1 + b3; |
237 | 609M | coeff[128] = b0 - b2; |
238 | 609M | coeff[192] = b1 - b3; |
239 | | |
240 | 609M | ++coeff; |
241 | 609M | } |
242 | 9.53M | } |
243 | | |
244 | | void aom_hadamard_lp_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, |
245 | 0 | int16_t *coeff) { |
246 | 0 | for (int idx = 0; idx < 4; ++idx) { |
247 | | // src_diff: 9 bit, dynamic range [-255, 255] |
248 | 0 | const int16_t *src_ptr = |
249 | 0 | src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; |
250 | 0 | aom_hadamard_lp_8x8_c(src_ptr, src_stride, coeff + idx * 64); |
251 | 0 | } |
252 | |
|
253 | 0 | for (int idx = 0; idx < 64; ++idx) { |
254 | 0 | int16_t a0 = coeff[0]; |
255 | 0 | int16_t a1 = coeff[64]; |
256 | 0 | int16_t a2 = coeff[128]; |
257 | 0 | int16_t a3 = coeff[192]; |
258 | |
|
259 | 0 | int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] |
260 | 0 | int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range |
261 | 0 | int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320] |
262 | 0 | int16_t b3 = (a2 - a3) >> 1; |
263 | |
|
264 | 0 | coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] |
265 | 0 | coeff[64] = b1 + b3; |
266 | 0 | coeff[128] = b0 - b2; |
267 | 0 | coeff[192] = b1 - b3; |
268 | |
|
269 | 0 | ++coeff; |
270 | 0 | } |
271 | 0 | } |
272 | | |
273 | | void aom_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride, |
274 | 1.99M | tran_low_t *coeff) { |
275 | 1.99M | int idx; |
276 | 9.96M | for (idx = 0; idx < 4; ++idx) { |
277 | | // src_diff: 9 bit, dynamic range [-255, 255] |
278 | 7.96M | const int16_t *src_ptr = |
279 | 7.96M | src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; |
280 | 7.96M | aom_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256); |
281 | 7.96M | } |
282 | | |
283 | | // coeff: 15 bit, dynamic range [-16320, 16320] |
284 | 511M | for (idx = 0; idx < 256; ++idx) { |
285 | 509M | tran_low_t a0 = coeff[0]; |
286 | 509M | tran_low_t a1 = coeff[256]; |
287 | 509M | tran_low_t a2 = coeff[512]; |
288 | 509M | tran_low_t a3 = coeff[768]; |
289 | | |
290 | 509M | tran_low_t b0 = (a0 + a1) >> 2; // (a0 + a1): 16 bit, [-32640, 32640] |
291 | 509M | tran_low_t b1 = (a0 - a1) >> 2; // b0-b3: 15 bit, dynamic range |
292 | 509M | tran_low_t b2 = (a2 + a3) >> 2; // [-16320, 16320] |
293 | 509M | tran_low_t b3 = (a2 - a3) >> 2; |
294 | | |
295 | 509M | coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] |
296 | 509M | coeff[256] = b1 + b3; |
297 | 509M | coeff[512] = b0 - b2; |
298 | 509M | coeff[768] = b1 - b3; |
299 | | |
300 | 509M | ++coeff; |
301 | 509M | } |
302 | 1.99M | } |
303 | | |
304 | | #if CONFIG_AV1_HIGHBITDEPTH |
305 | | static void hadamard_highbd_col8_first_pass(const int16_t *src_diff, |
306 | | ptrdiff_t src_stride, |
307 | 0 | int16_t *coeff) { |
308 | 0 | int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; |
309 | 0 | int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; |
310 | 0 | int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; |
311 | 0 | int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; |
312 | 0 | int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; |
313 | 0 | int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; |
314 | 0 | int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; |
315 | 0 | int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; |
316 | |
|
317 | 0 | int16_t c0 = b0 + b2; |
318 | 0 | int16_t c1 = b1 + b3; |
319 | 0 | int16_t c2 = b0 - b2; |
320 | 0 | int16_t c3 = b1 - b3; |
321 | 0 | int16_t c4 = b4 + b6; |
322 | 0 | int16_t c5 = b5 + b7; |
323 | 0 | int16_t c6 = b4 - b6; |
324 | 0 | int16_t c7 = b5 - b7; |
325 | |
|
326 | 0 | coeff[0] = c0 + c4; |
327 | 0 | coeff[7] = c1 + c5; |
328 | 0 | coeff[3] = c2 + c6; |
329 | 0 | coeff[4] = c3 + c7; |
330 | 0 | coeff[2] = c0 - c4; |
331 | 0 | coeff[6] = c1 - c5; |
332 | 0 | coeff[1] = c2 - c6; |
333 | 0 | coeff[5] = c3 - c7; |
334 | 0 | } |
335 | | |
336 | | // src_diff: 16 bit, dynamic range [-32760, 32760] |
337 | | // coeff: 19 bit |
338 | | static void hadamard_highbd_col8_second_pass(const int16_t *src_diff, |
339 | | ptrdiff_t src_stride, |
340 | 0 | int32_t *coeff) { |
341 | 0 | int32_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; |
342 | 0 | int32_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; |
343 | 0 | int32_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; |
344 | 0 | int32_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; |
345 | 0 | int32_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; |
346 | 0 | int32_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; |
347 | 0 | int32_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; |
348 | 0 | int32_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; |
349 | |
|
350 | 0 | int32_t c0 = b0 + b2; |
351 | 0 | int32_t c1 = b1 + b3; |
352 | 0 | int32_t c2 = b0 - b2; |
353 | 0 | int32_t c3 = b1 - b3; |
354 | 0 | int32_t c4 = b4 + b6; |
355 | 0 | int32_t c5 = b5 + b7; |
356 | 0 | int32_t c6 = b4 - b6; |
357 | 0 | int32_t c7 = b5 - b7; |
358 | |
|
359 | 0 | coeff[0] = c0 + c4; |
360 | 0 | coeff[7] = c1 + c5; |
361 | 0 | coeff[3] = c2 + c6; |
362 | 0 | coeff[4] = c3 + c7; |
363 | 0 | coeff[2] = c0 - c4; |
364 | 0 | coeff[6] = c1 - c5; |
365 | 0 | coeff[1] = c2 - c6; |
366 | 0 | coeff[5] = c3 - c7; |
367 | 0 | } |
368 | | |
369 | | // The order of the output coeff of the hadamard is not important. For |
370 | | // optimization purposes the final transpose may be skipped. |
371 | | void aom_highbd_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, |
372 | 0 | tran_low_t *coeff) { |
373 | 0 | int idx; |
374 | 0 | int16_t buffer[64]; |
375 | 0 | int32_t buffer2[64]; |
376 | 0 | int16_t *tmp_buf = &buffer[0]; |
377 | 0 | for (idx = 0; idx < 8; ++idx) { |
378 | | // src_diff: 13 bit |
379 | | // buffer: 16 bit, dynamic range [-32760, 32760] |
380 | 0 | hadamard_highbd_col8_first_pass(src_diff, src_stride, tmp_buf); |
381 | 0 | tmp_buf += 8; |
382 | 0 | ++src_diff; |
383 | 0 | } |
384 | |
|
385 | 0 | tmp_buf = &buffer[0]; |
386 | 0 | for (idx = 0; idx < 8; ++idx) { |
387 | | // buffer: 16 bit |
388 | | // buffer2: 19 bit, dynamic range [-262080, 262080] |
389 | 0 | hadamard_highbd_col8_second_pass(tmp_buf, 8, buffer2 + 8 * idx); |
390 | 0 | ++tmp_buf; |
391 | 0 | } |
392 | |
|
393 | 0 | for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; |
394 | 0 | } |
395 | | |
396 | | // In place 16x16 2D Hadamard transform |
397 | | void aom_highbd_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, |
398 | 0 | tran_low_t *coeff) { |
399 | 0 | int idx; |
400 | 0 | for (idx = 0; idx < 4; ++idx) { |
401 | | // src_diff: 13 bit, dynamic range [-4095, 4095] |
402 | 0 | const int16_t *src_ptr = |
403 | 0 | src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; |
404 | 0 | aom_highbd_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); |
405 | 0 | } |
406 | | |
407 | | // coeff: 19 bit, dynamic range [-262080, 262080] |
408 | 0 | for (idx = 0; idx < 64; ++idx) { |
409 | 0 | tran_low_t a0 = coeff[0]; |
410 | 0 | tran_low_t a1 = coeff[64]; |
411 | 0 | tran_low_t a2 = coeff[128]; |
412 | 0 | tran_low_t a3 = coeff[192]; |
413 | |
|
414 | 0 | tran_low_t b0 = (a0 + a1) >> 1; |
415 | 0 | tran_low_t b1 = (a0 - a1) >> 1; |
416 | 0 | tran_low_t b2 = (a2 + a3) >> 1; |
417 | 0 | tran_low_t b3 = (a2 - a3) >> 1; |
418 | | |
419 | | // new coeff dynamic range: 20 bit |
420 | 0 | coeff[0] = b0 + b2; |
421 | 0 | coeff[64] = b1 + b3; |
422 | 0 | coeff[128] = b0 - b2; |
423 | 0 | coeff[192] = b1 - b3; |
424 | |
|
425 | 0 | ++coeff; |
426 | 0 | } |
427 | 0 | } |
428 | | |
429 | | void aom_highbd_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride, |
430 | 0 | tran_low_t *coeff) { |
431 | 0 | int idx; |
432 | 0 | for (idx = 0; idx < 4; ++idx) { |
433 | | // src_diff: 13 bit, dynamic range [-4095, 4095] |
434 | 0 | const int16_t *src_ptr = |
435 | 0 | src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; |
436 | 0 | aom_highbd_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256); |
437 | 0 | } |
438 | | |
439 | | // coeff: 20 bit |
440 | 0 | for (idx = 0; idx < 256; ++idx) { |
441 | 0 | tran_low_t a0 = coeff[0]; |
442 | 0 | tran_low_t a1 = coeff[256]; |
443 | 0 | tran_low_t a2 = coeff[512]; |
444 | 0 | tran_low_t a3 = coeff[768]; |
445 | |
|
446 | 0 | tran_low_t b0 = (a0 + a1) >> 2; |
447 | 0 | tran_low_t b1 = (a0 - a1) >> 2; |
448 | 0 | tran_low_t b2 = (a2 + a3) >> 2; |
449 | 0 | tran_low_t b3 = (a2 - a3) >> 2; |
450 | | |
451 | | // new coeff dynamic range: 20 bit |
452 | 0 | coeff[0] = b0 + b2; |
453 | 0 | coeff[256] = b1 + b3; |
454 | 0 | coeff[512] = b0 - b2; |
455 | 0 | coeff[768] = b1 - b3; |
456 | |
|
457 | 0 | ++coeff; |
458 | 0 | } |
459 | 0 | } |
460 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
461 | | |
462 | | // coeff: 16 bits, dynamic range [-32640, 32640]. |
463 | | // length: value range {16, 64, 256, 1024}. |
464 | 9.84M | int aom_satd_c(const tran_low_t *coeff, int length) { |
465 | 9.84M | int i; |
466 | 9.84M | int satd = 0; |
467 | 3.57G | for (i = 0; i < length; ++i) satd += abs(coeff[i]); |
468 | | |
469 | | // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024] |
470 | 9.84M | return satd; |
471 | 9.84M | } |
472 | | |
473 | 0 | int aom_satd_lp_c(const int16_t *coeff, int length) { |
474 | 0 | int satd = 0; |
475 | 0 | for (int i = 0; i < length; ++i) satd += abs(coeff[i]); |
476 | | |
477 | | // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024] |
478 | 0 | return satd; |
479 | 0 | } |
480 | | |
481 | | // Integer projection onto row vectors. |
482 | | // height: value range {16, 32, 64, 128}. |
483 | | void aom_int_pro_row_c(int16_t hbuf[16], const uint8_t *ref, |
484 | 0 | const int ref_stride, const int height) { |
485 | 0 | int idx; |
486 | 0 | const int norm_factor = height >> 1; |
487 | 0 | for (idx = 0; idx < 16; ++idx) { |
488 | 0 | int i; |
489 | 0 | hbuf[idx] = 0; |
490 | | // hbuf[idx]: 14 bit, dynamic range [0, 32640]. |
491 | 0 | for (i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride]; |
492 | | // hbuf[idx]: 9 bit, dynamic range [0, 1020]. |
493 | 0 | hbuf[idx] /= norm_factor; |
494 | 0 | ++ref; |
495 | 0 | } |
496 | 0 | } |
497 | | |
498 | | // width: value range {16, 32, 64, 128}. |
499 | 0 | int16_t aom_int_pro_col_c(const uint8_t *ref, const int width) { |
500 | 0 | int idx; |
501 | 0 | int16_t sum = 0; |
502 | | // sum: 14 bit, dynamic range [0, 32640] |
503 | 0 | for (idx = 0; idx < width; ++idx) sum += ref[idx]; |
504 | 0 | return sum; |
505 | 0 | } |
506 | | |
507 | | // ref: [0 - 510] |
508 | | // src: [0 - 510] |
509 | | // bwl: {2, 3, 4, 5} |
510 | 0 | int aom_vector_var_c(const int16_t *ref, const int16_t *src, const int bwl) { |
511 | 0 | int i; |
512 | 0 | int width = 4 << bwl; |
513 | 0 | int sse = 0, mean = 0, var; |
514 | |
|
515 | 0 | for (i = 0; i < width; ++i) { |
516 | 0 | int diff = ref[i] - src[i]; // diff: dynamic range [-510, 510], 10 bits. |
517 | 0 | mean += diff; // mean: dynamic range 16 bits. |
518 | 0 | sse += diff * diff; // sse: dynamic range 26 bits. |
519 | 0 | } |
520 | | |
521 | | // (mean * mean): dynamic range 31 bits. |
522 | 0 | var = sse - ((mean * mean) >> (bwl + 2)); |
523 | 0 | return var; |
524 | 0 | } |