Coverage Report

Created: 2025-07-18 07:02

/src/libhevc/encoder/ihevce_chroma_had_satd.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/*!
21
******************************************************************************
22
* \file ihevce_chroma_had_satd.c
23
*
24
* \brief
25
*    This file contains function definitions of chroma HAD SATD functions
26
*
27
* \date
28
*    15/07/2013
29
*
30
* \author
31
*    Ittiam
32
*
33
* List of Functions
34
*  ihevce_chroma_HAD_4x4_8b()
35
*  ihevce_chroma_compute_AC_HAD_4x4_8bit()
36
*  ihevce_hbd_chroma_HAD_4x4()
37
*  ihevce_hbd_chroma_compute_AC_HAD_4x4()
38
*  ihevce_chroma_HAD_8x8_8bit()
39
*  ihevce_hbd_chroma_HAD_8x8()
40
*  ihevce_chroma_HAD_16x16_8bit()
41
*  ihevce_hbd_chroma_HAD_16x16()
42
*
43
******************************************************************************
44
*/
45
46
/*****************************************************************************/
47
/* File Includes                                                             */
48
/*****************************************************************************/
49
/* System include files */
50
#include <stdio.h>
51
#include <string.h>
52
#include <stdlib.h>
53
#include <assert.h>
54
#include <stdarg.h>
55
#include <math.h>
56
57
/* User include files */
58
#include "ihevc_typedefs.h"
59
#include "ihevc_debug.h"
60
#include "itt_video_api.h"
61
62
#include "ihevce_api.h"
63
#include "ihevce_defs.h"
64
#include "ihevce_had_satd.h"
65
66
/*****************************************************************************/
67
/* Function Definitions                                                      */
68
/*****************************************************************************/
69
70
/**
71
*******************************************************************************
72
*
73
* @brief
74
*  Chroma Hadamard Transform of 4x4 block (8-bit input)
75
*
76
* @par Description:
77
*
78
* @param[in] pu1_origin
79
*  UWORD8 pointer to the source block (u or v, interleaved)
80
*
81
* @param[in] src_strd
82
*  WORD32 Source stride
83
*
84
* @param[in] pu1_pred_buf
85
*  UWORD8 pointer to the prediction block (u or v, interleaved)
86
*
87
* @param[in] pred_strd
88
*  WORD32 Pred stride
89
*
90
* @param[in] pi2_dst
91
*  WORD16 pointer to the transform block
92
*
93
* @param[in] dst_strd (u or v, interleaved)
94
*  WORD32 Destination stride
95
*
96
* @returns
97
*  Hadamard SAD
98
*
99
* @remarks
100
*  Not updating the transform destination now. Only returning the SATD
101
*
102
*******************************************************************************
103
*/
104
UWORD32 ihevce_chroma_HAD_4x4_8bit(
105
    UWORD8 *pu1_origin,
106
    WORD32 src_strd,
107
    UWORD8 *pu1_pred_buf,
108
    WORD32 pred_strd,
109
    WORD16 *pi2_dst,
110
    WORD32 dst_strd)
111
12.7M
{
112
12.7M
    WORD32 k;
113
12.7M
    WORD16 diff[16], m[16], d[16];
114
12.7M
    UWORD32 u4_sad = 0;
115
116
12.7M
    (void)pi2_dst;
117
12.7M
    (void)dst_strd;
118
63.8M
    for(k = 0; k < 16; k += 4)
119
51.0M
    {
120
        /* u or v, interleaved */
121
51.0M
        diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
122
51.0M
        diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
123
51.0M
        diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
124
51.0M
        diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
125
126
51.0M
        pu1_pred_buf += pred_strd;
127
51.0M
        pu1_origin += src_strd;
128
51.0M
    }
129
130
    /*===== hadamard transform =====*/
131
12.7M
    m[0] = diff[0] + diff[12];
132
12.7M
    m[1] = diff[1] + diff[13];
133
12.7M
    m[2] = diff[2] + diff[14];
134
12.7M
    m[3] = diff[3] + diff[15];
135
12.7M
    m[4] = diff[4] + diff[8];
136
12.7M
    m[5] = diff[5] + diff[9];
137
12.7M
    m[6] = diff[6] + diff[10];
138
12.7M
    m[7] = diff[7] + diff[11];
139
12.7M
    m[8] = diff[4] - diff[8];
140
12.7M
    m[9] = diff[5] - diff[9];
141
12.7M
    m[10] = diff[6] - diff[10];
142
12.7M
    m[11] = diff[7] - diff[11];
143
12.7M
    m[12] = diff[0] - diff[12];
144
12.7M
    m[13] = diff[1] - diff[13];
145
12.7M
    m[14] = diff[2] - diff[14];
146
12.7M
    m[15] = diff[3] - diff[15];
147
148
12.7M
    d[0] = m[0] + m[4];
149
12.7M
    d[1] = m[1] + m[5];
150
12.7M
    d[2] = m[2] + m[6];
151
12.7M
    d[3] = m[3] + m[7];
152
12.7M
    d[4] = m[8] + m[12];
153
12.7M
    d[5] = m[9] + m[13];
154
12.7M
    d[6] = m[10] + m[14];
155
12.7M
    d[7] = m[11] + m[15];
156
12.7M
    d[8] = m[0] - m[4];
157
12.7M
    d[9] = m[1] - m[5];
158
12.7M
    d[10] = m[2] - m[6];
159
12.7M
    d[11] = m[3] - m[7];
160
12.7M
    d[12] = m[12] - m[8];
161
12.7M
    d[13] = m[13] - m[9];
162
12.7M
    d[14] = m[14] - m[10];
163
12.7M
    d[15] = m[15] - m[11];
164
165
12.7M
    m[0] = d[0] + d[3];
166
12.7M
    m[1] = d[1] + d[2];
167
12.7M
    m[2] = d[1] - d[2];
168
12.7M
    m[3] = d[0] - d[3];
169
12.7M
    m[4] = d[4] + d[7];
170
12.7M
    m[5] = d[5] + d[6];
171
12.7M
    m[6] = d[5] - d[6];
172
12.7M
    m[7] = d[4] - d[7];
173
12.7M
    m[8] = d[8] + d[11];
174
12.7M
    m[9] = d[9] + d[10];
175
12.7M
    m[10] = d[9] - d[10];
176
12.7M
    m[11] = d[8] - d[11];
177
12.7M
    m[12] = d[12] + d[15];
178
12.7M
    m[13] = d[13] + d[14];
179
12.7M
    m[14] = d[13] - d[14];
180
12.7M
    m[15] = d[12] - d[15];
181
182
12.7M
    d[0] = m[0] + m[1];
183
12.7M
    d[1] = m[0] - m[1];
184
12.7M
    d[2] = m[2] + m[3];
185
12.7M
    d[3] = m[3] - m[2];
186
12.7M
    d[4] = m[4] + m[5];
187
12.7M
    d[5] = m[4] - m[5];
188
12.7M
    d[6] = m[6] + m[7];
189
12.7M
    d[7] = m[7] - m[6];
190
12.7M
    d[8] = m[8] + m[9];
191
12.7M
    d[9] = m[8] - m[9];
192
12.7M
    d[10] = m[10] + m[11];
193
12.7M
    d[11] = m[11] - m[10];
194
12.7M
    d[12] = m[12] + m[13];
195
12.7M
    d[13] = m[12] - m[13];
196
12.7M
    d[14] = m[14] + m[15];
197
12.7M
    d[15] = m[15] - m[14];
198
199
    /*===== sad =====*/
200
217M
    for(k = 0; k < 16; ++k)
201
204M
    {
202
204M
        u4_sad += (d[k] > 0 ? d[k] : -d[k]);
203
204M
    }
204
12.7M
    u4_sad = ((u4_sad + 2) >> 2);
205
206
12.7M
    return u4_sad;
207
12.7M
}
208
209
/**
210
*******************************************************************************
211
*
212
* @brief
213
*  Chroma Hadamard Transform of 4x4 block (8-bit input) with DC suppressed
214
*
215
* @par Description:
216
*
217
* @param[in] pu1_origin
218
*  UWORD8 pointer to the source block (u or v, interleaved)
219
*
220
* @param[in] src_strd
221
*  WORD32 Source stride
222
*
223
* @param[in] pu1_pred_buf
224
*  UWORD8 pointer to the prediction block (u or v, interleaved)
225
*
226
* @param[in] pred_strd
227
*  WORD32 Pred stride
228
*
229
* @param[in] pi2_dst
230
*  WORD16 pointer to the transform block
231
*
232
* @param[in] dst_strd (u or v, interleaved)
233
*  WORD32 Destination stride
234
*
235
* @returns
236
*  Hadamard SAD
237
*
238
* @remarks
239
*  Not updating the transform destination now. Only returning the SATD
240
*
241
*******************************************************************************
242
*/
243
UWORD32 ihevce_chroma_compute_AC_HAD_4x4_8bit(
244
    UWORD8 *pu1_origin,
245
    WORD32 src_strd,
246
    UWORD8 *pu1_pred_buf,
247
    WORD32 pred_strd,
248
    WORD16 *pi2_dst,
249
    WORD32 dst_strd)
250
0
{
251
0
    WORD32 k;
252
0
    WORD16 diff[16], m[16], d[16];
253
0
    UWORD32 u4_sad = 0;
254
255
0
    (void)pi2_dst;
256
0
    (void)dst_strd;
257
0
    for(k = 0; k < 16; k += 4)
258
0
    {
259
        /* u or v, interleaved */
260
0
        diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
261
0
        diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
262
0
        diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
263
0
        diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
264
265
0
        pu1_pred_buf += pred_strd;
266
0
        pu1_origin += src_strd;
267
0
    }
268
269
    /*===== hadamard transform =====*/
270
0
    m[0] = diff[0] + diff[12];
271
0
    m[1] = diff[1] + diff[13];
272
0
    m[2] = diff[2] + diff[14];
273
0
    m[3] = diff[3] + diff[15];
274
0
    m[4] = diff[4] + diff[8];
275
0
    m[5] = diff[5] + diff[9];
276
0
    m[6] = diff[6] + diff[10];
277
0
    m[7] = diff[7] + diff[11];
278
0
    m[8] = diff[4] - diff[8];
279
0
    m[9] = diff[5] - diff[9];
280
0
    m[10] = diff[6] - diff[10];
281
0
    m[11] = diff[7] - diff[11];
282
0
    m[12] = diff[0] - diff[12];
283
0
    m[13] = diff[1] - diff[13];
284
0
    m[14] = diff[2] - diff[14];
285
0
    m[15] = diff[3] - diff[15];
286
287
0
    d[0] = m[0] + m[4];
288
0
    d[1] = m[1] + m[5];
289
0
    d[2] = m[2] + m[6];
290
0
    d[3] = m[3] + m[7];
291
0
    d[4] = m[8] + m[12];
292
0
    d[5] = m[9] + m[13];
293
0
    d[6] = m[10] + m[14];
294
0
    d[7] = m[11] + m[15];
295
0
    d[8] = m[0] - m[4];
296
0
    d[9] = m[1] - m[5];
297
0
    d[10] = m[2] - m[6];
298
0
    d[11] = m[3] - m[7];
299
0
    d[12] = m[12] - m[8];
300
0
    d[13] = m[13] - m[9];
301
0
    d[14] = m[14] - m[10];
302
0
    d[15] = m[15] - m[11];
303
304
0
    m[0] = d[0] + d[3];
305
0
    m[1] = d[1] + d[2];
306
0
    m[2] = d[1] - d[2];
307
0
    m[3] = d[0] - d[3];
308
0
    m[4] = d[4] + d[7];
309
0
    m[5] = d[5] + d[6];
310
0
    m[6] = d[5] - d[6];
311
0
    m[7] = d[4] - d[7];
312
0
    m[8] = d[8] + d[11];
313
0
    m[9] = d[9] + d[10];
314
0
    m[10] = d[9] - d[10];
315
0
    m[11] = d[8] - d[11];
316
0
    m[12] = d[12] + d[15];
317
0
    m[13] = d[13] + d[14];
318
0
    m[14] = d[13] - d[14];
319
0
    m[15] = d[12] - d[15];
320
321
0
    d[0] = m[0] + m[1];
322
0
    d[1] = m[0] - m[1];
323
0
    d[2] = m[2] + m[3];
324
0
    d[3] = m[3] - m[2];
325
0
    d[4] = m[4] + m[5];
326
0
    d[5] = m[4] - m[5];
327
0
    d[6] = m[6] + m[7];
328
0
    d[7] = m[7] - m[6];
329
0
    d[8] = m[8] + m[9];
330
0
    d[9] = m[8] - m[9];
331
0
    d[10] = m[10] + m[11];
332
0
    d[11] = m[11] - m[10];
333
0
    d[12] = m[12] + m[13];
334
0
    d[13] = m[12] - m[13];
335
0
    d[14] = m[14] + m[15];
336
0
    d[15] = m[15] - m[14];
337
338
    /* DC masking */
339
0
    d[0] = 0;
340
341
    /*===== sad =====*/
342
0
    for(k = 0; k < 16; ++k)
343
0
    {
344
0
        u4_sad += (d[k] > 0 ? d[k] : -d[k]);
345
0
    }
346
0
    u4_sad = ((u4_sad + 2) >> 2);
347
348
0
    return u4_sad;
349
0
}
350
351
/**
352
*******************************************************************************
353
*
354
* @brief
355
*  Chroma Hadamard Transform of 8x8 block (8-bit input)
356
*
357
* @par Description:
358
*
359
* @param[in] pu1_origin
360
*  UWORD8 pointer to the source block (u or v, interleaved)
361
*
362
* @param[in] src_strd
363
*  WORD32 Source stride
364
*
365
* @param[in] pu1_pred_buf
366
*  UWORD8 pointer to the prediction block (u or v, interleaved)
367
*
368
* @param[in] pred_strd
369
*  WORD32 Pred stride
370
*
371
* @param[in] pi2_dst
372
*  WORD16 pointer to the transform block
373
*
374
* @param[in] dst_strd (u or v, interleaved)
375
*  WORD32 Destination stride
376
*
377
* @returns
378
*  Hadamard SAD
379
*
380
* @remarks
381
*  Not updating the transform destination now. Only returning the SATD
382
*
383
*******************************************************************************
384
*/
385
UWORD32 ihevce_chroma_HAD_8x8_8bit(
386
    UWORD8 *pu1_origin,
387
    WORD32 src_strd,
388
    UWORD8 *pu1_pred_buf,
389
    WORD32 pred_strd,
390
    WORD16 *pi2_dst,
391
    WORD32 dst_strd)
392
27.6M
{
393
27.6M
    WORD32 k, i, j, jj;
394
27.6M
    UWORD32 u4_sad = 0;
395
27.6M
    WORD16 diff[64], m1[8][8], m2[8][8], m3[8][8];
396
397
27.6M
    (void)pi2_dst;
398
27.6M
    (void)dst_strd;
399
248M
    for(k = 0; k < 64; k += 8)
400
220M
    {
401
        /* u or v, interleaved */
402
220M
        diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
403
220M
        diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
404
220M
        diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
405
220M
        diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
406
220M
        diff[k + 4] = pu1_origin[2 * 4] - pu1_pred_buf[2 * 4];
407
220M
        diff[k + 5] = pu1_origin[2 * 5] - pu1_pred_buf[2 * 5];
408
220M
        diff[k + 6] = pu1_origin[2 * 6] - pu1_pred_buf[2 * 6];
409
220M
        diff[k + 7] = pu1_origin[2 * 7] - pu1_pred_buf[2 * 7];
410
411
220M
        pu1_pred_buf += pred_strd;
412
220M
        pu1_origin += src_strd;
413
220M
    }
414
415
    /*===== hadamard transform =====*/
416
    // horizontal
417
248M
    for(j = 0; j < 8; j++)
418
220M
    {
419
220M
        jj = j << 3;
420
220M
        m2[j][0] = diff[jj] + diff[jj + 4];
421
220M
        m2[j][1] = diff[jj + 1] + diff[jj + 5];
422
220M
        m2[j][2] = diff[jj + 2] + diff[jj + 6];
423
220M
        m2[j][3] = diff[jj + 3] + diff[jj + 7];
424
220M
        m2[j][4] = diff[jj] - diff[jj + 4];
425
220M
        m2[j][5] = diff[jj + 1] - diff[jj + 5];
426
220M
        m2[j][6] = diff[jj + 2] - diff[jj + 6];
427
220M
        m2[j][7] = diff[jj + 3] - diff[jj + 7];
428
429
220M
        m1[j][0] = m2[j][0] + m2[j][2];
430
220M
        m1[j][1] = m2[j][1] + m2[j][3];
431
220M
        m1[j][2] = m2[j][0] - m2[j][2];
432
220M
        m1[j][3] = m2[j][1] - m2[j][3];
433
220M
        m1[j][4] = m2[j][4] + m2[j][6];
434
220M
        m1[j][5] = m2[j][5] + m2[j][7];
435
220M
        m1[j][6] = m2[j][4] - m2[j][6];
436
220M
        m1[j][7] = m2[j][5] - m2[j][7];
437
438
220M
        m2[j][0] = m1[j][0] + m1[j][1];
439
220M
        m2[j][1] = m1[j][0] - m1[j][1];
440
220M
        m2[j][2] = m1[j][2] + m1[j][3];
441
220M
        m2[j][3] = m1[j][2] - m1[j][3];
442
220M
        m2[j][4] = m1[j][4] + m1[j][5];
443
220M
        m2[j][5] = m1[j][4] - m1[j][5];
444
220M
        m2[j][6] = m1[j][6] + m1[j][7];
445
220M
        m2[j][7] = m1[j][6] - m1[j][7];
446
220M
    }
447
448
    // vertical
449
248M
    for(i = 0; i < 8; i++)
450
220M
    {
451
220M
        m3[0][i] = m2[0][i] + m2[4][i];
452
220M
        m3[1][i] = m2[1][i] + m2[5][i];
453
220M
        m3[2][i] = m2[2][i] + m2[6][i];
454
220M
        m3[3][i] = m2[3][i] + m2[7][i];
455
220M
        m3[4][i] = m2[0][i] - m2[4][i];
456
220M
        m3[5][i] = m2[1][i] - m2[5][i];
457
220M
        m3[6][i] = m2[2][i] - m2[6][i];
458
220M
        m3[7][i] = m2[3][i] - m2[7][i];
459
460
220M
        m1[0][i] = m3[0][i] + m3[2][i];
461
220M
        m1[1][i] = m3[1][i] + m3[3][i];
462
220M
        m1[2][i] = m3[0][i] - m3[2][i];
463
220M
        m1[3][i] = m3[1][i] - m3[3][i];
464
220M
        m1[4][i] = m3[4][i] + m3[6][i];
465
220M
        m1[5][i] = m3[5][i] + m3[7][i];
466
220M
        m1[6][i] = m3[4][i] - m3[6][i];
467
220M
        m1[7][i] = m3[5][i] - m3[7][i];
468
469
220M
        m2[0][i] = m1[0][i] + m1[1][i];
470
220M
        m2[1][i] = m1[0][i] - m1[1][i];
471
220M
        m2[2][i] = m1[2][i] + m1[3][i];
472
220M
        m2[3][i] = m1[2][i] - m1[3][i];
473
220M
        m2[4][i] = m1[4][i] + m1[5][i];
474
220M
        m2[5][i] = m1[4][i] - m1[5][i];
475
220M
        m2[6][i] = m1[6][i] + m1[7][i];
476
220M
        m2[7][i] = m1[6][i] - m1[7][i];
477
220M
    }
478
479
    /*===== sad =====*/
480
248M
    for(i = 0; i < 8; i++)
481
220M
    {
482
1.98G
        for(j = 0; j < 8; j++)
483
1.76G
        {
484
1.76G
            u4_sad += (m2[i][j] > 0 ? m2[i][j] : -m2[i][j]);
485
1.76G
        }
486
220M
    }
487
27.6M
    u4_sad = ((u4_sad + 4) >> 3);
488
489
27.6M
    return u4_sad;
490
27.6M
}
491
492
/**
493
*******************************************************************************
494
*
495
* @brief
496
*  Chroma Hadamard Transform of 16x16 block (8-bit input)
497
*
498
* @par Description:
499
*
500
* @param[in] pu1_origin
501
*  UWORD8 pointer to the source block (u or v, interleaved)
502
*
503
* @param[in] src_strd
504
*  WORD32 Source stride
505
*
506
* @param[in] pu1_pred_buf
507
*  UWORD8 pointer to the prediction block (u or v, interleaved)
508
*
509
* @param[in] pred_strd
510
*  WORD32 Pred stride
511
*
512
* @param[in] pi2_dst
513
*  WORD16 pointer to the transform block
514
*
515
* @param[in] dst_strd (u or v, interleaved)
516
*  WORD32 Destination stride
517
*
518
* @returns
519
*  Hadamard SAD
520
*
521
* @remarks
522
*  Not updating the transform destination now. Only returning the SATD
523
*
524
*******************************************************************************
525
*/
526
UWORD32 ihevce_chroma_HAD_16x16_8bit(
527
    UWORD8 *pu1_origin,
528
    WORD32 src_strd,
529
    UWORD8 *pu1_pred_buf,
530
    WORD32 pred_strd,
531
    WORD16 *pi2_dst,
532
    WORD32 dst_strd)
533
4.25M
{
534
4.25M
    UWORD32 au4_sad[4], u4_result = 0;
535
4.25M
    WORD32 i;
536
537
21.2M
    for(i = 0; i < 4; i++)
538
17.0M
    {
539
17.0M
        au4_sad[i] = ihevce_chroma_HAD_8x8_8bit(
540
17.0M
            pu1_origin, src_strd, pu1_pred_buf, pred_strd, pi2_dst, dst_strd);
541
542
17.0M
        if(i == 0 || i == 2)
543
8.51M
        {
544
8.51M
            pu1_origin += 16;
545
8.51M
            pu1_pred_buf += 16;
546
8.51M
        }
547
548
17.0M
        if(i == 1)
549
4.25M
        {
550
4.25M
            pu1_origin += (8 * src_strd) - 16;
551
4.25M
            pu1_pred_buf += (8 * pred_strd) - 16;
552
4.25M
        }
553
554
17.0M
        u4_result += au4_sad[i];
555
17.0M
    }
556
557
4.25M
    return u4_result;
558
4.25M
}