/src/libhevc/encoder/ihevce_chroma_had_satd.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /*! |
21 | | ****************************************************************************** |
22 | | * \file ihevce_chroma_had_satd.c |
23 | | * |
24 | | * \brief |
25 | | * This file contains function definitions of chroma HAD SATD functions |
26 | | * |
27 | | * \date |
28 | | * 15/07/2013 |
29 | | * |
30 | | * \author |
31 | | * Ittiam |
32 | | * |
33 | | * List of Functions |
34 | | * ihevce_chroma_HAD_4x4_8b() |
35 | | * ihevce_chroma_compute_AC_HAD_4x4_8bit() |
36 | | * ihevce_hbd_chroma_HAD_4x4() |
37 | | * ihevce_hbd_chroma_compute_AC_HAD_4x4() |
38 | | * ihevce_chroma_HAD_8x8_8bit() |
39 | | * ihevce_hbd_chroma_HAD_8x8() |
40 | | * ihevce_chroma_HAD_16x16_8bit() |
41 | | * ihevce_hbd_chroma_HAD_16x16() |
42 | | * |
43 | | ****************************************************************************** |
44 | | */ |
45 | | |
46 | | /*****************************************************************************/ |
47 | | /* File Includes */ |
48 | | /*****************************************************************************/ |
49 | | /* System include files */ |
50 | | #include <stdio.h> |
51 | | #include <string.h> |
52 | | #include <stdlib.h> |
53 | | #include <assert.h> |
54 | | #include <stdarg.h> |
55 | | #include <math.h> |
56 | | |
57 | | /* User include files */ |
58 | | #include "ihevc_typedefs.h" |
59 | | #include "ihevc_debug.h" |
60 | | #include "itt_video_api.h" |
61 | | |
62 | | #include "ihevce_api.h" |
63 | | #include "ihevce_defs.h" |
64 | | #include "ihevce_had_satd.h" |
65 | | |
66 | | /*****************************************************************************/ |
67 | | /* Function Definitions */ |
68 | | /*****************************************************************************/ |
69 | | |
70 | | /** |
71 | | ******************************************************************************* |
72 | | * |
73 | | * @brief |
74 | | * Chroma Hadamard Transform of 4x4 block (8-bit input) |
75 | | * |
76 | | * @par Description: |
77 | | * |
78 | | * @param[in] pu1_origin |
79 | | * UWORD8 pointer to the source block (u or v, interleaved) |
80 | | * |
81 | | * @param[in] src_strd |
82 | | * WORD32 Source stride |
83 | | * |
84 | | * @param[in] pu1_pred_buf |
85 | | * UWORD8 pointer to the prediction block (u or v, interleaved) |
86 | | * |
87 | | * @param[in] pred_strd |
88 | | * WORD32 Pred stride |
89 | | * |
90 | | * @param[in] pi2_dst |
91 | | * WORD16 pointer to the transform block |
92 | | * |
93 | | * @param[in] dst_strd (u or v, interleaved) |
94 | | * WORD32 Destination stride |
95 | | * |
96 | | * @returns |
97 | | * Hadamard SAD |
98 | | * |
99 | | * @remarks |
100 | | * Not updating the transform destination now. Only returning the SATD |
101 | | * |
102 | | ******************************************************************************* |
103 | | */ |
104 | | UWORD32 ihevce_chroma_HAD_4x4_8bit( |
105 | | UWORD8 *pu1_origin, |
106 | | WORD32 src_strd, |
107 | | UWORD8 *pu1_pred_buf, |
108 | | WORD32 pred_strd, |
109 | | WORD16 *pi2_dst, |
110 | | WORD32 dst_strd) |
111 | 12.7M | { |
112 | 12.7M | WORD32 k; |
113 | 12.7M | WORD16 diff[16], m[16], d[16]; |
114 | 12.7M | UWORD32 u4_sad = 0; |
115 | | |
116 | 12.7M | (void)pi2_dst; |
117 | 12.7M | (void)dst_strd; |
118 | 63.8M | for(k = 0; k < 16; k += 4) |
119 | 51.0M | { |
120 | | /* u or v, interleaved */ |
121 | 51.0M | diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0]; |
122 | 51.0M | diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1]; |
123 | 51.0M | diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2]; |
124 | 51.0M | diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3]; |
125 | | |
126 | 51.0M | pu1_pred_buf += pred_strd; |
127 | 51.0M | pu1_origin += src_strd; |
128 | 51.0M | } |
129 | | |
130 | | /*===== hadamard transform =====*/ |
131 | 12.7M | m[0] = diff[0] + diff[12]; |
132 | 12.7M | m[1] = diff[1] + diff[13]; |
133 | 12.7M | m[2] = diff[2] + diff[14]; |
134 | 12.7M | m[3] = diff[3] + diff[15]; |
135 | 12.7M | m[4] = diff[4] + diff[8]; |
136 | 12.7M | m[5] = diff[5] + diff[9]; |
137 | 12.7M | m[6] = diff[6] + diff[10]; |
138 | 12.7M | m[7] = diff[7] + diff[11]; |
139 | 12.7M | m[8] = diff[4] - diff[8]; |
140 | 12.7M | m[9] = diff[5] - diff[9]; |
141 | 12.7M | m[10] = diff[6] - diff[10]; |
142 | 12.7M | m[11] = diff[7] - diff[11]; |
143 | 12.7M | m[12] = diff[0] - diff[12]; |
144 | 12.7M | m[13] = diff[1] - diff[13]; |
145 | 12.7M | m[14] = diff[2] - diff[14]; |
146 | 12.7M | m[15] = diff[3] - diff[15]; |
147 | | |
148 | 12.7M | d[0] = m[0] + m[4]; |
149 | 12.7M | d[1] = m[1] + m[5]; |
150 | 12.7M | d[2] = m[2] + m[6]; |
151 | 12.7M | d[3] = m[3] + m[7]; |
152 | 12.7M | d[4] = m[8] + m[12]; |
153 | 12.7M | d[5] = m[9] + m[13]; |
154 | 12.7M | d[6] = m[10] + m[14]; |
155 | 12.7M | d[7] = m[11] + m[15]; |
156 | 12.7M | d[8] = m[0] - m[4]; |
157 | 12.7M | d[9] = m[1] - m[5]; |
158 | 12.7M | d[10] = m[2] - m[6]; |
159 | 12.7M | d[11] = m[3] - m[7]; |
160 | 12.7M | d[12] = m[12] - m[8]; |
161 | 12.7M | d[13] = m[13] - m[9]; |
162 | 12.7M | d[14] = m[14] - m[10]; |
163 | 12.7M | d[15] = m[15] - m[11]; |
164 | | |
165 | 12.7M | m[0] = d[0] + d[3]; |
166 | 12.7M | m[1] = d[1] + d[2]; |
167 | 12.7M | m[2] = d[1] - d[2]; |
168 | 12.7M | m[3] = d[0] - d[3]; |
169 | 12.7M | m[4] = d[4] + d[7]; |
170 | 12.7M | m[5] = d[5] + d[6]; |
171 | 12.7M | m[6] = d[5] - d[6]; |
172 | 12.7M | m[7] = d[4] - d[7]; |
173 | 12.7M | m[8] = d[8] + d[11]; |
174 | 12.7M | m[9] = d[9] + d[10]; |
175 | 12.7M | m[10] = d[9] - d[10]; |
176 | 12.7M | m[11] = d[8] - d[11]; |
177 | 12.7M | m[12] = d[12] + d[15]; |
178 | 12.7M | m[13] = d[13] + d[14]; |
179 | 12.7M | m[14] = d[13] - d[14]; |
180 | 12.7M | m[15] = d[12] - d[15]; |
181 | | |
182 | 12.7M | d[0] = m[0] + m[1]; |
183 | 12.7M | d[1] = m[0] - m[1]; |
184 | 12.7M | d[2] = m[2] + m[3]; |
185 | 12.7M | d[3] = m[3] - m[2]; |
186 | 12.7M | d[4] = m[4] + m[5]; |
187 | 12.7M | d[5] = m[4] - m[5]; |
188 | 12.7M | d[6] = m[6] + m[7]; |
189 | 12.7M | d[7] = m[7] - m[6]; |
190 | 12.7M | d[8] = m[8] + m[9]; |
191 | 12.7M | d[9] = m[8] - m[9]; |
192 | 12.7M | d[10] = m[10] + m[11]; |
193 | 12.7M | d[11] = m[11] - m[10]; |
194 | 12.7M | d[12] = m[12] + m[13]; |
195 | 12.7M | d[13] = m[12] - m[13]; |
196 | 12.7M | d[14] = m[14] + m[15]; |
197 | 12.7M | d[15] = m[15] - m[14]; |
198 | | |
199 | | /*===== sad =====*/ |
200 | 217M | for(k = 0; k < 16; ++k) |
201 | 204M | { |
202 | 204M | u4_sad += (d[k] > 0 ? d[k] : -d[k]); |
203 | 204M | } |
204 | 12.7M | u4_sad = ((u4_sad + 2) >> 2); |
205 | | |
206 | 12.7M | return u4_sad; |
207 | 12.7M | } |
208 | | |
209 | | /** |
210 | | ******************************************************************************* |
211 | | * |
212 | | * @brief |
213 | | * Chroma Hadamard Transform of 4x4 block (8-bit input) with DC suppressed |
214 | | * |
215 | | * @par Description: |
216 | | * |
217 | | * @param[in] pu1_origin |
218 | | * UWORD8 pointer to the source block (u or v, interleaved) |
219 | | * |
220 | | * @param[in] src_strd |
221 | | * WORD32 Source stride |
222 | | * |
223 | | * @param[in] pu1_pred_buf |
224 | | * UWORD8 pointer to the prediction block (u or v, interleaved) |
225 | | * |
226 | | * @param[in] pred_strd |
227 | | * WORD32 Pred stride |
228 | | * |
229 | | * @param[in] pi2_dst |
230 | | * WORD16 pointer to the transform block |
231 | | * |
232 | | * @param[in] dst_strd (u or v, interleaved) |
233 | | * WORD32 Destination stride |
234 | | * |
235 | | * @returns |
236 | | * Hadamard SAD |
237 | | * |
238 | | * @remarks |
239 | | * Not updating the transform destination now. Only returning the SATD |
240 | | * |
241 | | ******************************************************************************* |
242 | | */ |
243 | | UWORD32 ihevce_chroma_compute_AC_HAD_4x4_8bit( |
244 | | UWORD8 *pu1_origin, |
245 | | WORD32 src_strd, |
246 | | UWORD8 *pu1_pred_buf, |
247 | | WORD32 pred_strd, |
248 | | WORD16 *pi2_dst, |
249 | | WORD32 dst_strd) |
250 | 0 | { |
251 | 0 | WORD32 k; |
252 | 0 | WORD16 diff[16], m[16], d[16]; |
253 | 0 | UWORD32 u4_sad = 0; |
254 | |
|
255 | 0 | (void)pi2_dst; |
256 | 0 | (void)dst_strd; |
257 | 0 | for(k = 0; k < 16; k += 4) |
258 | 0 | { |
259 | | /* u or v, interleaved */ |
260 | 0 | diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0]; |
261 | 0 | diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1]; |
262 | 0 | diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2]; |
263 | 0 | diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3]; |
264 | |
|
265 | 0 | pu1_pred_buf += pred_strd; |
266 | 0 | pu1_origin += src_strd; |
267 | 0 | } |
268 | | |
269 | | /*===== hadamard transform =====*/ |
270 | 0 | m[0] = diff[0] + diff[12]; |
271 | 0 | m[1] = diff[1] + diff[13]; |
272 | 0 | m[2] = diff[2] + diff[14]; |
273 | 0 | m[3] = diff[3] + diff[15]; |
274 | 0 | m[4] = diff[4] + diff[8]; |
275 | 0 | m[5] = diff[5] + diff[9]; |
276 | 0 | m[6] = diff[6] + diff[10]; |
277 | 0 | m[7] = diff[7] + diff[11]; |
278 | 0 | m[8] = diff[4] - diff[8]; |
279 | 0 | m[9] = diff[5] - diff[9]; |
280 | 0 | m[10] = diff[6] - diff[10]; |
281 | 0 | m[11] = diff[7] - diff[11]; |
282 | 0 | m[12] = diff[0] - diff[12]; |
283 | 0 | m[13] = diff[1] - diff[13]; |
284 | 0 | m[14] = diff[2] - diff[14]; |
285 | 0 | m[15] = diff[3] - diff[15]; |
286 | |
|
287 | 0 | d[0] = m[0] + m[4]; |
288 | 0 | d[1] = m[1] + m[5]; |
289 | 0 | d[2] = m[2] + m[6]; |
290 | 0 | d[3] = m[3] + m[7]; |
291 | 0 | d[4] = m[8] + m[12]; |
292 | 0 | d[5] = m[9] + m[13]; |
293 | 0 | d[6] = m[10] + m[14]; |
294 | 0 | d[7] = m[11] + m[15]; |
295 | 0 | d[8] = m[0] - m[4]; |
296 | 0 | d[9] = m[1] - m[5]; |
297 | 0 | d[10] = m[2] - m[6]; |
298 | 0 | d[11] = m[3] - m[7]; |
299 | 0 | d[12] = m[12] - m[8]; |
300 | 0 | d[13] = m[13] - m[9]; |
301 | 0 | d[14] = m[14] - m[10]; |
302 | 0 | d[15] = m[15] - m[11]; |
303 | |
|
304 | 0 | m[0] = d[0] + d[3]; |
305 | 0 | m[1] = d[1] + d[2]; |
306 | 0 | m[2] = d[1] - d[2]; |
307 | 0 | m[3] = d[0] - d[3]; |
308 | 0 | m[4] = d[4] + d[7]; |
309 | 0 | m[5] = d[5] + d[6]; |
310 | 0 | m[6] = d[5] - d[6]; |
311 | 0 | m[7] = d[4] - d[7]; |
312 | 0 | m[8] = d[8] + d[11]; |
313 | 0 | m[9] = d[9] + d[10]; |
314 | 0 | m[10] = d[9] - d[10]; |
315 | 0 | m[11] = d[8] - d[11]; |
316 | 0 | m[12] = d[12] + d[15]; |
317 | 0 | m[13] = d[13] + d[14]; |
318 | 0 | m[14] = d[13] - d[14]; |
319 | 0 | m[15] = d[12] - d[15]; |
320 | |
|
321 | 0 | d[0] = m[0] + m[1]; |
322 | 0 | d[1] = m[0] - m[1]; |
323 | 0 | d[2] = m[2] + m[3]; |
324 | 0 | d[3] = m[3] - m[2]; |
325 | 0 | d[4] = m[4] + m[5]; |
326 | 0 | d[5] = m[4] - m[5]; |
327 | 0 | d[6] = m[6] + m[7]; |
328 | 0 | d[7] = m[7] - m[6]; |
329 | 0 | d[8] = m[8] + m[9]; |
330 | 0 | d[9] = m[8] - m[9]; |
331 | 0 | d[10] = m[10] + m[11]; |
332 | 0 | d[11] = m[11] - m[10]; |
333 | 0 | d[12] = m[12] + m[13]; |
334 | 0 | d[13] = m[12] - m[13]; |
335 | 0 | d[14] = m[14] + m[15]; |
336 | 0 | d[15] = m[15] - m[14]; |
337 | | |
338 | | /* DC masking */ |
339 | 0 | d[0] = 0; |
340 | | |
341 | | /*===== sad =====*/ |
342 | 0 | for(k = 0; k < 16; ++k) |
343 | 0 | { |
344 | 0 | u4_sad += (d[k] > 0 ? d[k] : -d[k]); |
345 | 0 | } |
346 | 0 | u4_sad = ((u4_sad + 2) >> 2); |
347 | |
|
348 | 0 | return u4_sad; |
349 | 0 | } |
350 | | |
351 | | /** |
352 | | ******************************************************************************* |
353 | | * |
354 | | * @brief |
355 | | * Chroma Hadamard Transform of 8x8 block (8-bit input) |
356 | | * |
357 | | * @par Description: |
358 | | * |
359 | | * @param[in] pu1_origin |
360 | | * UWORD8 pointer to the source block (u or v, interleaved) |
361 | | * |
362 | | * @param[in] src_strd |
363 | | * WORD32 Source stride |
364 | | * |
365 | | * @param[in] pu1_pred_buf |
366 | | * UWORD8 pointer to the prediction block (u or v, interleaved) |
367 | | * |
368 | | * @param[in] pred_strd |
369 | | * WORD32 Pred stride |
370 | | * |
371 | | * @param[in] pi2_dst |
372 | | * WORD16 pointer to the transform block |
373 | | * |
374 | | * @param[in] dst_strd (u or v, interleaved) |
375 | | * WORD32 Destination stride |
376 | | * |
377 | | * @returns |
378 | | * Hadamard SAD |
379 | | * |
380 | | * @remarks |
381 | | * Not updating the transform destination now. Only returning the SATD |
382 | | * |
383 | | ******************************************************************************* |
384 | | */ |
385 | | UWORD32 ihevce_chroma_HAD_8x8_8bit( |
386 | | UWORD8 *pu1_origin, |
387 | | WORD32 src_strd, |
388 | | UWORD8 *pu1_pred_buf, |
389 | | WORD32 pred_strd, |
390 | | WORD16 *pi2_dst, |
391 | | WORD32 dst_strd) |
392 | 27.6M | { |
393 | 27.6M | WORD32 k, i, j, jj; |
394 | 27.6M | UWORD32 u4_sad = 0; |
395 | 27.6M | WORD16 diff[64], m1[8][8], m2[8][8], m3[8][8]; |
396 | | |
397 | 27.6M | (void)pi2_dst; |
398 | 27.6M | (void)dst_strd; |
399 | 248M | for(k = 0; k < 64; k += 8) |
400 | 220M | { |
401 | | /* u or v, interleaved */ |
402 | 220M | diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0]; |
403 | 220M | diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1]; |
404 | 220M | diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2]; |
405 | 220M | diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3]; |
406 | 220M | diff[k + 4] = pu1_origin[2 * 4] - pu1_pred_buf[2 * 4]; |
407 | 220M | diff[k + 5] = pu1_origin[2 * 5] - pu1_pred_buf[2 * 5]; |
408 | 220M | diff[k + 6] = pu1_origin[2 * 6] - pu1_pred_buf[2 * 6]; |
409 | 220M | diff[k + 7] = pu1_origin[2 * 7] - pu1_pred_buf[2 * 7]; |
410 | | |
411 | 220M | pu1_pred_buf += pred_strd; |
412 | 220M | pu1_origin += src_strd; |
413 | 220M | } |
414 | | |
415 | | /*===== hadamard transform =====*/ |
416 | | // horizontal |
417 | 248M | for(j = 0; j < 8; j++) |
418 | 220M | { |
419 | 220M | jj = j << 3; |
420 | 220M | m2[j][0] = diff[jj] + diff[jj + 4]; |
421 | 220M | m2[j][1] = diff[jj + 1] + diff[jj + 5]; |
422 | 220M | m2[j][2] = diff[jj + 2] + diff[jj + 6]; |
423 | 220M | m2[j][3] = diff[jj + 3] + diff[jj + 7]; |
424 | 220M | m2[j][4] = diff[jj] - diff[jj + 4]; |
425 | 220M | m2[j][5] = diff[jj + 1] - diff[jj + 5]; |
426 | 220M | m2[j][6] = diff[jj + 2] - diff[jj + 6]; |
427 | 220M | m2[j][7] = diff[jj + 3] - diff[jj + 7]; |
428 | | |
429 | 220M | m1[j][0] = m2[j][0] + m2[j][2]; |
430 | 220M | m1[j][1] = m2[j][1] + m2[j][3]; |
431 | 220M | m1[j][2] = m2[j][0] - m2[j][2]; |
432 | 220M | m1[j][3] = m2[j][1] - m2[j][3]; |
433 | 220M | m1[j][4] = m2[j][4] + m2[j][6]; |
434 | 220M | m1[j][5] = m2[j][5] + m2[j][7]; |
435 | 220M | m1[j][6] = m2[j][4] - m2[j][6]; |
436 | 220M | m1[j][7] = m2[j][5] - m2[j][7]; |
437 | | |
438 | 220M | m2[j][0] = m1[j][0] + m1[j][1]; |
439 | 220M | m2[j][1] = m1[j][0] - m1[j][1]; |
440 | 220M | m2[j][2] = m1[j][2] + m1[j][3]; |
441 | 220M | m2[j][3] = m1[j][2] - m1[j][3]; |
442 | 220M | m2[j][4] = m1[j][4] + m1[j][5]; |
443 | 220M | m2[j][5] = m1[j][4] - m1[j][5]; |
444 | 220M | m2[j][6] = m1[j][6] + m1[j][7]; |
445 | 220M | m2[j][7] = m1[j][6] - m1[j][7]; |
446 | 220M | } |
447 | | |
448 | | // vertical |
449 | 248M | for(i = 0; i < 8; i++) |
450 | 220M | { |
451 | 220M | m3[0][i] = m2[0][i] + m2[4][i]; |
452 | 220M | m3[1][i] = m2[1][i] + m2[5][i]; |
453 | 220M | m3[2][i] = m2[2][i] + m2[6][i]; |
454 | 220M | m3[3][i] = m2[3][i] + m2[7][i]; |
455 | 220M | m3[4][i] = m2[0][i] - m2[4][i]; |
456 | 220M | m3[5][i] = m2[1][i] - m2[5][i]; |
457 | 220M | m3[6][i] = m2[2][i] - m2[6][i]; |
458 | 220M | m3[7][i] = m2[3][i] - m2[7][i]; |
459 | | |
460 | 220M | m1[0][i] = m3[0][i] + m3[2][i]; |
461 | 220M | m1[1][i] = m3[1][i] + m3[3][i]; |
462 | 220M | m1[2][i] = m3[0][i] - m3[2][i]; |
463 | 220M | m1[3][i] = m3[1][i] - m3[3][i]; |
464 | 220M | m1[4][i] = m3[4][i] + m3[6][i]; |
465 | 220M | m1[5][i] = m3[5][i] + m3[7][i]; |
466 | 220M | m1[6][i] = m3[4][i] - m3[6][i]; |
467 | 220M | m1[7][i] = m3[5][i] - m3[7][i]; |
468 | | |
469 | 220M | m2[0][i] = m1[0][i] + m1[1][i]; |
470 | 220M | m2[1][i] = m1[0][i] - m1[1][i]; |
471 | 220M | m2[2][i] = m1[2][i] + m1[3][i]; |
472 | 220M | m2[3][i] = m1[2][i] - m1[3][i]; |
473 | 220M | m2[4][i] = m1[4][i] + m1[5][i]; |
474 | 220M | m2[5][i] = m1[4][i] - m1[5][i]; |
475 | 220M | m2[6][i] = m1[6][i] + m1[7][i]; |
476 | 220M | m2[7][i] = m1[6][i] - m1[7][i]; |
477 | 220M | } |
478 | | |
479 | | /*===== sad =====*/ |
480 | 248M | for(i = 0; i < 8; i++) |
481 | 220M | { |
482 | 1.98G | for(j = 0; j < 8; j++) |
483 | 1.76G | { |
484 | 1.76G | u4_sad += (m2[i][j] > 0 ? m2[i][j] : -m2[i][j]); |
485 | 1.76G | } |
486 | 220M | } |
487 | 27.6M | u4_sad = ((u4_sad + 4) >> 3); |
488 | | |
489 | 27.6M | return u4_sad; |
490 | 27.6M | } |
491 | | |
492 | | /** |
493 | | ******************************************************************************* |
494 | | * |
495 | | * @brief |
496 | | * Chroma Hadamard Transform of 16x16 block (8-bit input) |
497 | | * |
498 | | * @par Description: |
499 | | * |
500 | | * @param[in] pu1_origin |
501 | | * UWORD8 pointer to the source block (u or v, interleaved) |
502 | | * |
503 | | * @param[in] src_strd |
504 | | * WORD32 Source stride |
505 | | * |
506 | | * @param[in] pu1_pred_buf |
507 | | * UWORD8 pointer to the prediction block (u or v, interleaved) |
508 | | * |
509 | | * @param[in] pred_strd |
510 | | * WORD32 Pred stride |
511 | | * |
512 | | * @param[in] pi2_dst |
513 | | * WORD16 pointer to the transform block |
514 | | * |
515 | | * @param[in] dst_strd (u or v, interleaved) |
516 | | * WORD32 Destination stride |
517 | | * |
518 | | * @returns |
519 | | * Hadamard SAD |
520 | | * |
521 | | * @remarks |
522 | | * Not updating the transform destination now. Only returning the SATD |
523 | | * |
524 | | ******************************************************************************* |
525 | | */ |
526 | | UWORD32 ihevce_chroma_HAD_16x16_8bit( |
527 | | UWORD8 *pu1_origin, |
528 | | WORD32 src_strd, |
529 | | UWORD8 *pu1_pred_buf, |
530 | | WORD32 pred_strd, |
531 | | WORD16 *pi2_dst, |
532 | | WORD32 dst_strd) |
533 | 4.25M | { |
534 | 4.25M | UWORD32 au4_sad[4], u4_result = 0; |
535 | 4.25M | WORD32 i; |
536 | | |
537 | 21.2M | for(i = 0; i < 4; i++) |
538 | 17.0M | { |
539 | 17.0M | au4_sad[i] = ihevce_chroma_HAD_8x8_8bit( |
540 | 17.0M | pu1_origin, src_strd, pu1_pred_buf, pred_strd, pi2_dst, dst_strd); |
541 | | |
542 | 17.0M | if(i == 0 || i == 2) |
543 | 8.51M | { |
544 | 8.51M | pu1_origin += 16; |
545 | 8.51M | pu1_pred_buf += 16; |
546 | 8.51M | } |
547 | | |
548 | 17.0M | if(i == 1) |
549 | 4.25M | { |
550 | 4.25M | pu1_origin += (8 * src_strd) - 16; |
551 | 4.25M | pu1_pred_buf += (8 * pred_strd) - 16; |
552 | 4.25M | } |
553 | | |
554 | 17.0M | u4_result += au4_sad[i]; |
555 | 17.0M | } |
556 | | |
557 | 4.25M | return u4_result; |
558 | 4.25M | } |