Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <assert.h> |
13 | | #include <stdlib.h> |
14 | | |
15 | | #include "config/aom_dsp_rtcd.h" |
16 | | #include "aom_ports/mem.h" |
17 | | |
18 | | void aom_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, |
19 | 0 | int *min, int *max) { |
20 | 0 | int i, j; |
21 | 0 | *min = 255; |
22 | 0 | *max = 0; |
23 | 0 | for (i = 0; i < 8; ++i, s += p, d += dp) { |
24 | 0 | for (j = 0; j < 8; ++j) { |
25 | 0 | int diff = abs(s[j] - d[j]); |
26 | 0 | *min = diff < *min ? diff : *min; |
27 | 0 | *max = diff > *max ? diff : *max; |
28 | 0 | } |
29 | 0 | } |
30 | 0 | } |
31 | | |
32 | 0 | unsigned int aom_avg_4x4_c(const uint8_t *s, int p) { |
33 | 0 | int i, j; |
34 | 0 | int sum = 0; |
35 | 0 | for (i = 0; i < 4; ++i, s += p) |
36 | 0 | for (j = 0; j < 4; sum += s[j], ++j) { |
37 | 0 | } |
38 | |
|
39 | 0 | return (sum + 8) >> 4; |
40 | 0 | } |
41 | | |
42 | 0 | unsigned int aom_avg_8x8_c(const uint8_t *s, int p) { |
43 | 0 | int i, j; |
44 | 0 | int sum = 0; |
45 | 0 | for (i = 0; i < 8; ++i, s += p) |
46 | 0 | for (j = 0; j < 8; sum += s[j], ++j) { |
47 | 0 | } |
48 | |
|
49 | 0 | return (sum + 32) >> 6; |
50 | 0 | } |
51 | | |
52 | | void aom_avg_8x8_quad_c(const uint8_t *s, int p, int x16_idx, int y16_idx, |
53 | 0 | int *avg) { |
54 | 0 | for (int k = 0; k < 4; k++) { |
55 | 0 | const int x8_idx = x16_idx + ((k & 1) << 3); |
56 | 0 | const int y8_idx = y16_idx + ((k >> 1) << 3); |
57 | 0 | const uint8_t *s_tmp = s + y8_idx * p + x8_idx; |
58 | 0 | avg[k] = aom_avg_8x8_c(s_tmp, p); |
59 | 0 | } |
60 | 0 | } |
61 | | |
62 | | #if CONFIG_AV1_HIGHBITDEPTH |
63 | 0 | unsigned int aom_highbd_avg_8x8_c(const uint8_t *s8, int p) { |
64 | 0 | int i, j; |
65 | 0 | int sum = 0; |
66 | 0 | const uint16_t *s = CONVERT_TO_SHORTPTR(s8); |
67 | 0 | for (i = 0; i < 8; ++i, s += p) |
68 | 0 | for (j = 0; j < 8; sum += s[j], ++j) { |
69 | 0 | } |
70 | |
|
71 | 0 | return (sum + 32) >> 6; |
72 | 0 | } |
73 | | |
74 | 0 | unsigned int aom_highbd_avg_4x4_c(const uint8_t *s8, int p) { |
75 | 0 | int i, j; |
76 | 0 | int sum = 0; |
77 | 0 | const uint16_t *s = CONVERT_TO_SHORTPTR(s8); |
78 | 0 | for (i = 0; i < 4; ++i, s += p) |
79 | 0 | for (j = 0; j < 4; sum += s[j], ++j) { |
80 | 0 | } |
81 | |
|
82 | 0 | return (sum + 8) >> 4; |
83 | 0 | } |
84 | | |
85 | | void aom_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8, |
86 | 0 | int dp, int *min, int *max) { |
87 | 0 | int i, j; |
88 | 0 | const uint16_t *s = CONVERT_TO_SHORTPTR(s8); |
89 | 0 | const uint16_t *d = CONVERT_TO_SHORTPTR(d8); |
90 | 0 | *min = 65535; |
91 | 0 | *max = 0; |
92 | 0 | for (i = 0; i < 8; ++i, s += p, d += dp) { |
93 | 0 | for (j = 0; j < 8; ++j) { |
94 | 0 | int diff = abs(s[j] - d[j]); |
95 | 0 | *min = diff < *min ? diff : *min; |
96 | 0 | *max = diff > *max ? diff : *max; |
97 | 0 | } |
98 | 0 | } |
99 | 0 | } |
100 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
101 | | |
102 | | static void hadamard_col4(const int16_t *src_diff, ptrdiff_t src_stride, |
103 | 0 | int16_t *coeff) { |
104 | 0 | int16_t b0 = (src_diff[0 * src_stride] + src_diff[1 * src_stride]) >> 1; |
105 | 0 | int16_t b1 = (src_diff[0 * src_stride] - src_diff[1 * src_stride]) >> 1; |
106 | 0 | int16_t b2 = (src_diff[2 * src_stride] + src_diff[3 * src_stride]) >> 1; |
107 | 0 | int16_t b3 = (src_diff[2 * src_stride] - src_diff[3 * src_stride]) >> 1; |
108 | |
|
109 | 0 | coeff[0] = b0 + b2; |
110 | 0 | coeff[1] = b1 + b3; |
111 | 0 | coeff[2] = b0 - b2; |
112 | 0 | coeff[3] = b1 - b3; |
113 | 0 | } |
114 | | |
115 | | void aom_hadamard_4x4_c(const int16_t *src_diff, ptrdiff_t src_stride, |
116 | 0 | tran_low_t *coeff) { |
117 | 0 | int idx; |
118 | 0 | int16_t buffer[16]; |
119 | 0 | int16_t buffer2[16]; |
120 | 0 | int16_t *tmp_buf = &buffer[0]; |
121 | 0 | for (idx = 0; idx < 4; ++idx) { |
122 | 0 | hadamard_col4(src_diff, src_stride, tmp_buf); // src_diff: 9 bit |
123 | | // dynamic range [-255, 255] |
124 | 0 | tmp_buf += 4; |
125 | 0 | ++src_diff; |
126 | 0 | } |
127 | |
|
128 | 0 | tmp_buf = &buffer[0]; |
129 | 0 | for (idx = 0; idx < 4; ++idx) { |
130 | 0 | hadamard_col4(tmp_buf, 4, buffer2 + 4 * idx); // tmp_buf: 12 bit |
131 | | // dynamic range [-2040, 2040] |
132 | | // buffer2: 15 bit |
133 | | // dynamic range [-16320, 16320] |
134 | 0 | ++tmp_buf; |
135 | 0 | } |
136 | | |
137 | | // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_4x4_sse2). |
138 | 0 | for (int i = 0; i < 4; i++) { |
139 | 0 | for (int j = 0; j < 4; j++) { |
140 | 0 | coeff[i * 4 + j] = (tran_low_t)buffer2[j * 4 + i]; |
141 | 0 | } |
142 | 0 | } |
143 | 0 | } |
144 | | |
145 | | // src_diff: first pass, 9 bit, dynamic range [-255, 255] |
146 | | // second pass, 12 bit, dynamic range [-2040, 2040] |
147 | | static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride, |
148 | 0 | int16_t *coeff) { |
149 | 0 | int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; |
150 | 0 | int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; |
151 | 0 | int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; |
152 | 0 | int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; |
153 | 0 | int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; |
154 | 0 | int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; |
155 | 0 | int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; |
156 | 0 | int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; |
157 | |
|
158 | 0 | int16_t c0 = b0 + b2; |
159 | 0 | int16_t c1 = b1 + b3; |
160 | 0 | int16_t c2 = b0 - b2; |
161 | 0 | int16_t c3 = b1 - b3; |
162 | 0 | int16_t c4 = b4 + b6; |
163 | 0 | int16_t c5 = b5 + b7; |
164 | 0 | int16_t c6 = b4 - b6; |
165 | 0 | int16_t c7 = b5 - b7; |
166 | |
|
167 | 0 | coeff[0] = c0 + c4; |
168 | 0 | coeff[7] = c1 + c5; |
169 | 0 | coeff[3] = c2 + c6; |
170 | 0 | coeff[4] = c3 + c7; |
171 | 0 | coeff[2] = c0 - c4; |
172 | 0 | coeff[6] = c1 - c5; |
173 | 0 | coeff[1] = c2 - c6; |
174 | 0 | coeff[5] = c3 - c7; |
175 | 0 | } |
176 | | |
177 | | void aom_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, |
178 | 0 | tran_low_t *coeff) { |
179 | 0 | int idx; |
180 | 0 | int16_t buffer[64]; |
181 | 0 | int16_t buffer2[64]; |
182 | 0 | int16_t *tmp_buf = &buffer[0]; |
183 | 0 | for (idx = 0; idx < 8; ++idx) { |
184 | 0 | hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit |
185 | | // dynamic range [-255, 255] |
186 | 0 | tmp_buf += 8; |
187 | 0 | ++src_diff; |
188 | 0 | } |
189 | |
|
190 | 0 | tmp_buf = &buffer[0]; |
191 | 0 | for (idx = 0; idx < 8; ++idx) { |
192 | 0 | hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx); // tmp_buf: 12 bit |
193 | | // dynamic range [-2040, 2040] |
194 | | // buffer2: 15 bit |
195 | | // dynamic range [-16320, 16320] |
196 | 0 | ++tmp_buf; |
197 | 0 | } |
198 | | |
199 | | // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_8x8_sse2). |
200 | 0 | for (int i = 0; i < 8; i++) { |
201 | 0 | for (int j = 0; j < 8; j++) { |
202 | 0 | coeff[i * 8 + j] = (tran_low_t)buffer2[j * 8 + i]; |
203 | 0 | } |
204 | 0 | } |
205 | 0 | } |
206 | | |
207 | | void aom_hadamard_lp_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, |
208 | 0 | int16_t *coeff) { |
209 | 0 | int16_t buffer[64]; |
210 | 0 | int16_t buffer2[64]; |
211 | 0 | int16_t *tmp_buf = &buffer[0]; |
212 | 0 | for (int idx = 0; idx < 8; ++idx) { |
213 | 0 | hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit |
214 | | // dynamic range [-255, 255] |
215 | 0 | tmp_buf += 8; |
216 | 0 | ++src_diff; |
217 | 0 | } |
218 | |
|
219 | 0 | tmp_buf = &buffer[0]; |
220 | 0 | for (int idx = 0; idx < 8; ++idx) { |
221 | 0 | hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx); // tmp_buf: 12 bit |
222 | | // dynamic range [-2040, 2040] |
223 | | // buffer2: 15 bit |
224 | | // dynamic range [-16320, 16320] |
225 | 0 | ++tmp_buf; |
226 | 0 | } |
227 | |
|
228 | 0 | for (int idx = 0; idx < 64; ++idx) coeff[idx] = buffer2[idx]; |
229 | | |
230 | | // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_lp_8x8_sse2). |
231 | 0 | for (int i = 0; i < 8; i++) { |
232 | 0 | for (int j = 0; j < 8; j++) { |
233 | 0 | coeff[i * 8 + j] = buffer2[j * 8 + i]; |
234 | 0 | } |
235 | 0 | } |
236 | 0 | } |
237 | | |
238 | | void aom_hadamard_lp_8x8_dual_c(const int16_t *src_diff, ptrdiff_t src_stride, |
239 | 0 | int16_t *coeff) { |
240 | 0 | for (int i = 0; i < 2; i++) { |
241 | 0 | aom_hadamard_lp_8x8_c(src_diff + (i * 8), src_stride, |
242 | 0 | (int16_t *)coeff + (i * 64)); |
243 | 0 | } |
244 | 0 | } |
245 | | |
246 | | // In place 16x16 2D Hadamard transform |
247 | | void aom_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, |
248 | 0 | tran_low_t *coeff) { |
249 | 0 | int idx; |
250 | 0 | for (idx = 0; idx < 4; ++idx) { |
251 | | // src_diff: 9 bit, dynamic range [-255, 255] |
252 | 0 | const int16_t *src_ptr = |
253 | 0 | src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; |
254 | 0 | aom_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); |
255 | 0 | } |
256 | | |
257 | | // coeff: 15 bit, dynamic range [-16320, 16320] |
258 | 0 | for (idx = 0; idx < 64; ++idx) { |
259 | 0 | tran_low_t a0 = coeff[0]; |
260 | 0 | tran_low_t a1 = coeff[64]; |
261 | 0 | tran_low_t a2 = coeff[128]; |
262 | 0 | tran_low_t a3 = coeff[192]; |
263 | |
|
264 | 0 | tran_low_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] |
265 | 0 | tran_low_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range |
266 | 0 | tran_low_t b2 = (a2 + a3) >> 1; // [-16320, 16320] |
267 | 0 | tran_low_t b3 = (a2 - a3) >> 1; |
268 | |
|
269 | 0 | coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] |
270 | 0 | coeff[64] = b1 + b3; |
271 | 0 | coeff[128] = b0 - b2; |
272 | 0 | coeff[192] = b1 - b3; |
273 | |
|
274 | 0 | ++coeff; |
275 | 0 | } |
276 | |
|
277 | 0 | coeff -= 64; |
278 | | // Extra shift to match AVX2 output (i.e., aom_hadamard_16x16_avx2). |
279 | | // Note that to match SSE2 output, it does not need this step. |
280 | 0 | for (int i = 0; i < 16; i++) { |
281 | 0 | for (int j = 0; j < 4; j++) { |
282 | 0 | tran_low_t temp = coeff[i * 16 + 4 + j]; |
283 | 0 | coeff[i * 16 + 4 + j] = coeff[i * 16 + 8 + j]; |
284 | 0 | coeff[i * 16 + 8 + j] = temp; |
285 | 0 | } |
286 | 0 | } |
287 | 0 | } |
288 | | |
289 | | void aom_hadamard_lp_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, |
290 | 0 | int16_t *coeff) { |
291 | 0 | for (int idx = 0; idx < 4; ++idx) { |
292 | | // src_diff: 9 bit, dynamic range [-255, 255] |
293 | 0 | const int16_t *src_ptr = |
294 | 0 | src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; |
295 | 0 | aom_hadamard_lp_8x8_c(src_ptr, src_stride, coeff + idx * 64); |
296 | 0 | } |
297 | |
|
298 | 0 | for (int idx = 0; idx < 64; ++idx) { |
299 | 0 | int16_t a0 = coeff[0]; |
300 | 0 | int16_t a1 = coeff[64]; |
301 | 0 | int16_t a2 = coeff[128]; |
302 | 0 | int16_t a3 = coeff[192]; |
303 | |
|
304 | 0 | int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] |
305 | 0 | int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range |
306 | 0 | int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320] |
307 | 0 | int16_t b3 = (a2 - a3) >> 1; |
308 | |
|
309 | 0 | coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] |
310 | 0 | coeff[64] = b1 + b3; |
311 | 0 | coeff[128] = b0 - b2; |
312 | 0 | coeff[192] = b1 - b3; |
313 | |
|
314 | 0 | ++coeff; |
315 | 0 | } |
316 | 0 | } |
317 | | |
318 | | void aom_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride, |
319 | 0 | tran_low_t *coeff) { |
320 | 0 | int idx; |
321 | 0 | for (idx = 0; idx < 4; ++idx) { |
322 | | // src_diff: 9 bit, dynamic range [-255, 255] |
323 | 0 | const int16_t *src_ptr = |
324 | 0 | src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; |
325 | 0 | aom_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256); |
326 | 0 | } |
327 | | |
328 | | // coeff: 16 bit, dynamic range [-32768, 32767] |
329 | 0 | for (idx = 0; idx < 256; ++idx) { |
330 | 0 | tran_low_t a0 = coeff[0]; |
331 | 0 | tran_low_t a1 = coeff[256]; |
332 | 0 | tran_low_t a2 = coeff[512]; |
333 | 0 | tran_low_t a3 = coeff[768]; |
334 | |
|
335 | 0 | tran_low_t b0 = (a0 + a1) >> 2; // (a0 + a1): 17 bit, [-65536, 65535] |
336 | 0 | tran_low_t b1 = (a0 - a1) >> 2; // b0-b3: 15 bit, dynamic range |
337 | 0 | tran_low_t b2 = (a2 + a3) >> 2; // [-16384, 16383] |
338 | 0 | tran_low_t b3 = (a2 - a3) >> 2; |
339 | |
|
340 | 0 | coeff[0] = b0 + b2; // 16 bit, [-32768, 32767] |
341 | 0 | coeff[256] = b1 + b3; |
342 | 0 | coeff[512] = b0 - b2; |
343 | 0 | coeff[768] = b1 - b3; |
344 | |
|
345 | 0 | ++coeff; |
346 | 0 | } |
347 | 0 | } |
348 | | |
349 | | #if CONFIG_AV1_HIGHBITDEPTH |
350 | | static void hadamard_highbd_col8_first_pass(const int16_t *src_diff, |
351 | | ptrdiff_t src_stride, |
352 | 0 | int16_t *coeff) { |
353 | 0 | int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; |
354 | 0 | int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; |
355 | 0 | int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; |
356 | 0 | int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; |
357 | 0 | int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; |
358 | 0 | int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; |
359 | 0 | int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; |
360 | 0 | int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; |
361 | |
|
362 | 0 | int16_t c0 = b0 + b2; |
363 | 0 | int16_t c1 = b1 + b3; |
364 | 0 | int16_t c2 = b0 - b2; |
365 | 0 | int16_t c3 = b1 - b3; |
366 | 0 | int16_t c4 = b4 + b6; |
367 | 0 | int16_t c5 = b5 + b7; |
368 | 0 | int16_t c6 = b4 - b6; |
369 | 0 | int16_t c7 = b5 - b7; |
370 | |
|
371 | 0 | coeff[0] = c0 + c4; |
372 | 0 | coeff[7] = c1 + c5; |
373 | 0 | coeff[3] = c2 + c6; |
374 | 0 | coeff[4] = c3 + c7; |
375 | 0 | coeff[2] = c0 - c4; |
376 | 0 | coeff[6] = c1 - c5; |
377 | 0 | coeff[1] = c2 - c6; |
378 | 0 | coeff[5] = c3 - c7; |
379 | 0 | } |
380 | | |
381 | | // src_diff: 16 bit, dynamic range [-32760, 32760] |
382 | | // coeff: 19 bit |
383 | | static void hadamard_highbd_col8_second_pass(const int16_t *src_diff, |
384 | | ptrdiff_t src_stride, |
385 | 0 | int32_t *coeff) { |
386 | 0 | int32_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; |
387 | 0 | int32_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; |
388 | 0 | int32_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; |
389 | 0 | int32_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; |
390 | 0 | int32_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; |
391 | 0 | int32_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; |
392 | 0 | int32_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; |
393 | 0 | int32_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; |
394 | |
|
395 | 0 | int32_t c0 = b0 + b2; |
396 | 0 | int32_t c1 = b1 + b3; |
397 | 0 | int32_t c2 = b0 - b2; |
398 | 0 | int32_t c3 = b1 - b3; |
399 | 0 | int32_t c4 = b4 + b6; |
400 | 0 | int32_t c5 = b5 + b7; |
401 | 0 | int32_t c6 = b4 - b6; |
402 | 0 | int32_t c7 = b5 - b7; |
403 | |
|
404 | 0 | coeff[0] = c0 + c4; |
405 | 0 | coeff[7] = c1 + c5; |
406 | 0 | coeff[3] = c2 + c6; |
407 | 0 | coeff[4] = c3 + c7; |
408 | 0 | coeff[2] = c0 - c4; |
409 | 0 | coeff[6] = c1 - c5; |
410 | 0 | coeff[1] = c2 - c6; |
411 | 0 | coeff[5] = c3 - c7; |
412 | 0 | } |
413 | | |
414 | | // The order of the output coeff of the hadamard is not important. For |
415 | | // optimization purposes the final transpose may be skipped. |
416 | | void aom_highbd_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, |
417 | 0 | tran_low_t *coeff) { |
418 | 0 | int idx; |
419 | 0 | int16_t buffer[64]; |
420 | 0 | int32_t buffer2[64]; |
421 | 0 | int16_t *tmp_buf = &buffer[0]; |
422 | 0 | for (idx = 0; idx < 8; ++idx) { |
423 | | // src_diff: 13 bit |
424 | | // buffer: 16 bit, dynamic range [-32760, 32760] |
425 | 0 | hadamard_highbd_col8_first_pass(src_diff, src_stride, tmp_buf); |
426 | 0 | tmp_buf += 8; |
427 | 0 | ++src_diff; |
428 | 0 | } |
429 | |
|
430 | 0 | tmp_buf = &buffer[0]; |
431 | 0 | for (idx = 0; idx < 8; ++idx) { |
432 | | // buffer: 16 bit |
433 | | // buffer2: 19 bit, dynamic range [-262080, 262080] |
434 | 0 | hadamard_highbd_col8_second_pass(tmp_buf, 8, buffer2 + 8 * idx); |
435 | 0 | ++tmp_buf; |
436 | 0 | } |
437 | |
|
438 | 0 | for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; |
439 | 0 | } |
440 | | |
441 | | // In place 16x16 2D Hadamard transform |
442 | | void aom_highbd_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, |
443 | 0 | tran_low_t *coeff) { |
444 | 0 | int idx; |
445 | 0 | for (idx = 0; idx < 4; ++idx) { |
446 | | // src_diff: 13 bit, dynamic range [-4095, 4095] |
447 | 0 | const int16_t *src_ptr = |
448 | 0 | src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; |
449 | 0 | aom_highbd_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); |
450 | 0 | } |
451 | | |
452 | | // coeff: 19 bit, dynamic range [-262080, 262080] |
453 | 0 | for (idx = 0; idx < 64; ++idx) { |
454 | 0 | tran_low_t a0 = coeff[0]; |
455 | 0 | tran_low_t a1 = coeff[64]; |
456 | 0 | tran_low_t a2 = coeff[128]; |
457 | 0 | tran_low_t a3 = coeff[192]; |
458 | |
|
459 | 0 | tran_low_t b0 = (a0 + a1) >> 1; |
460 | 0 | tran_low_t b1 = (a0 - a1) >> 1; |
461 | 0 | tran_low_t b2 = (a2 + a3) >> 1; |
462 | 0 | tran_low_t b3 = (a2 - a3) >> 1; |
463 | | |
464 | | // new coeff dynamic range: 20 bit |
465 | 0 | coeff[0] = b0 + b2; |
466 | 0 | coeff[64] = b1 + b3; |
467 | 0 | coeff[128] = b0 - b2; |
468 | 0 | coeff[192] = b1 - b3; |
469 | |
|
470 | 0 | ++coeff; |
471 | 0 | } |
472 | 0 | } |
473 | | |
474 | | void aom_highbd_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride, |
475 | 0 | tran_low_t *coeff) { |
476 | 0 | int idx; |
477 | 0 | for (idx = 0; idx < 4; ++idx) { |
478 | | // src_diff: 13 bit, dynamic range [-4095, 4095] |
479 | 0 | const int16_t *src_ptr = |
480 | 0 | src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; |
481 | 0 | aom_highbd_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256); |
482 | 0 | } |
483 | | |
484 | | // coeff: 20 bit |
485 | 0 | for (idx = 0; idx < 256; ++idx) { |
486 | 0 | tran_low_t a0 = coeff[0]; |
487 | 0 | tran_low_t a1 = coeff[256]; |
488 | 0 | tran_low_t a2 = coeff[512]; |
489 | 0 | tran_low_t a3 = coeff[768]; |
490 | |
|
491 | 0 | tran_low_t b0 = (a0 + a1) >> 2; |
492 | 0 | tran_low_t b1 = (a0 - a1) >> 2; |
493 | 0 | tran_low_t b2 = (a2 + a3) >> 2; |
494 | 0 | tran_low_t b3 = (a2 - a3) >> 2; |
495 | | |
496 | | // new coeff dynamic range: 20 bit |
497 | 0 | coeff[0] = b0 + b2; |
498 | 0 | coeff[256] = b1 + b3; |
499 | 0 | coeff[512] = b0 - b2; |
500 | 0 | coeff[768] = b1 - b3; |
501 | |
|
502 | 0 | ++coeff; |
503 | 0 | } |
504 | 0 | } |
505 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
506 | | |
507 | | // coeff: 20 bits, dynamic range [-524287, 524287]. |
508 | | // length: value range {16, 32, 64, 128, 256, 512, 1024}. |
509 | 0 | int aom_satd_c(const tran_low_t *coeff, int length) { |
510 | 0 | int i; |
511 | 0 | int satd = 0; |
512 | 0 | for (i = 0; i < length; ++i) satd += abs(coeff[i]); |
513 | | |
514 | | // satd: 30 bits, dynamic range [-524287 * 1024, 524287 * 1024] |
515 | 0 | return satd; |
516 | 0 | } |
517 | | |
518 | 0 | int aom_satd_lp_c(const int16_t *coeff, int length) { |
519 | 0 | int satd = 0; |
520 | 0 | for (int i = 0; i < length; ++i) satd += abs(coeff[i]); |
521 | | |
522 | | // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024] |
523 | 0 | return satd; |
524 | 0 | } |
525 | | |
526 | | // Integer projection onto row vectors. |
527 | | // height: value range {16, 32, 64, 128}. |
528 | | void aom_int_pro_row_c(int16_t *hbuf, const uint8_t *ref, const int ref_stride, |
529 | 0 | const int width, const int height, int norm_factor) { |
530 | 0 | assert(height >= 2); |
531 | 0 | for (int idx = 0; idx < width; ++idx) { |
532 | 0 | hbuf[idx] = 0; |
533 | | // hbuf[idx]: 14 bit, dynamic range [0, 32640]. |
534 | 0 | for (int i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride]; |
535 | | // hbuf[idx]: 9 bit, dynamic range [0, 1020]. |
536 | 0 | hbuf[idx] >>= norm_factor; |
537 | 0 | ++ref; |
538 | 0 | } |
539 | 0 | } |
540 | | |
541 | | // width: value range {16, 32, 64, 128}. |
542 | | void aom_int_pro_col_c(int16_t *vbuf, const uint8_t *ref, const int ref_stride, |
543 | 0 | const int width, const int height, int norm_factor) { |
544 | 0 | for (int ht = 0; ht < height; ++ht) { |
545 | 0 | int16_t sum = 0; |
546 | | // sum: 14 bit, dynamic range [0, 32640] |
547 | 0 | for (int idx = 0; idx < width; ++idx) sum += ref[idx]; |
548 | 0 | vbuf[ht] = sum >> norm_factor; |
549 | 0 | ref += ref_stride; |
550 | 0 | } |
551 | 0 | } |
552 | | |
553 | | // ref: [0 - 510] |
554 | | // src: [0 - 510] |
555 | | // bwl: {2, 3, 4, 5} |
556 | 0 | int aom_vector_var_c(const int16_t *ref, const int16_t *src, int bwl) { |
557 | 0 | int i; |
558 | 0 | int width = 4 << bwl; |
559 | 0 | int sse = 0, mean = 0, var; |
560 | |
|
561 | 0 | for (i = 0; i < width; ++i) { |
562 | 0 | int diff = ref[i] - src[i]; // diff: dynamic range [-510, 510], 10 bits. |
563 | 0 | mean += diff; // mean: dynamic range 16 bits. |
564 | 0 | sse += diff * diff; // sse: dynamic range 26 bits. |
565 | 0 | } |
566 | | |
567 | | // (mean * mean): dynamic range 31 bits. |
568 | | // If width == 128, the mean can be 510 * 128 = 65280, and log2(65280 ** 2) ~= |
569 | | // 31.99, so it needs to be casted to unsigned int to compute its square. |
570 | 0 | const unsigned int mean_abs = abs(mean); |
571 | 0 | var = sse - ((mean_abs * mean_abs) >> (bwl + 2)); |
572 | 0 | return var; |
573 | 0 | } |