Coverage Report

Created: 2025-07-09 06:41

/src/libavc/encoder/ime_distortion_metrics.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
******************************************************************************
23
* @file ime_distortion_metrics.c
24
*
25
* @brief
26
*  This file contains definitions of routines that compute distortion
27
*  between two macro/sub blocks of identical dimensions
28
*
29
* @author
30
*  Ittiam
31
*
32
* @par List of Functions:
33
*  - ime_sub_pel_compute_sad_16x16()
34
*  - ime_calculate_sad4_prog()
35
*  - ime_calculate_sad3_prog()
36
*  - ime_calculate_sad2_prog()
37
*  - ime_compute_sad_16x16()
38
*  - ime_compute_sad_16x16_fast()
39
*  - ime_compute_sad_16x16_ea8()
40
*  - ime_compute_sad_8x8()
41
*  - ime_compute_sad_4x4()
42
*  - ime_compute_sad_16x8()
43
*  - ime_compute_satqd_16x16_lumainter()
44
*  - ime_compute_satqd_8x16_chroma()
45
*  - ime_compute_satqd_16x16_lumaintra()
46
*
47
* @remarks
48
*  None
49
*
50
*******************************************************************************
51
*/
52
53
/*****************************************************************************/
54
/* File Includes                                                             */
55
/*****************************************************************************/
56
57
/* System include files */
58
#include <stdio.h>
59
#include <stdlib.h>
60
#include <string.h>
61
62
/* User include files */
63
#include "ime_typedefs.h"
64
#include "ime_defs.h"
65
#include "ime_macros.h"
66
#include "ime_statistics.h"
67
#include "ime_platform_macros.h"
68
#include "ime_distortion_metrics.h"
69
70
71
/*****************************************************************************/
72
/* Function Definitions                                                      */
73
/*****************************************************************************/
74
75
/**
76
******************************************************************************
77
*
78
* @brief computes distortion (SAD) at all subpel points about the src location
79
*
80
* @par Description
81
*   This functions computes SAD at all points at a subpel distance from the
82
*   current source location.
83
*
84
* @param[in] pu1_src
85
*  UWORD8 pointer to the source
86
*
87
* @param[out] pu1_ref_half_x
88
*  UWORD8 pointer to half pel buffer
89
*
90
* @param[out] pu1_ref_half_y
91
*  UWORD8 pointer to half pel buffer
92
*
93
* @param[out] pu1_ref_half_xy
94
*  UWORD8 pointer to half pel buffer
95
*
96
* @param[in] src_strd
97
*  integer source stride
98
*
99
* @param[in] ref_strd
100
*  integer ref stride
101
*
102
* @param[out] pi4_sad
103
*  integer evaluated sad
104
*  pi4_sad[0] - half x
105
*  pi4_sad[1] - half x - 1
106
*  pi4_sad[2] - half y
107
*  pi4_sad[3] - half y - 1
108
*  pi4_sad[4] - half xy
109
*  pi4_sad[5] - half xy - 1
110
*  pi4_sad[6] - half xy - strd
111
*  pi4_sad[7] - half xy - 1 - strd
112
*
113
* @remarks
114
*
115
******************************************************************************
116
*/
117
void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
118
                                   UWORD8 *pu1_ref_half_x,
119
                                   UWORD8 *pu1_ref_half_y,
120
                                   UWORD8 *pu1_ref_half_xy,
121
                                   WORD32 src_strd,
122
                                   WORD32 ref_strd,
123
                                   WORD32 *pi4_sad)
124
326k
{
125
326k
    UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
126
326k
    UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
127
326k
    UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
128
326k
    UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
129
326k
    UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
130
131
326k
    WORD32 row, col;
132
133
326k
    memset(pi4_sad, 0, 8 * sizeof(WORD32));
134
135
5.52M
    for(row = 0; row < MB_SIZE; row++)
136
5.20M
    {
137
86.5M
        for(col = 0; col < MB_SIZE; col++)
138
81.3M
        {
139
81.3M
            WORD32 src;
140
81.3M
            WORD32 diff;
141
142
81.3M
            src = pu1_src[col];
143
144
81.3M
            diff = src - pu1_ref_half_x[col];
145
81.3M
            pi4_sad[0] += ABS(diff);
146
147
81.3M
            diff = src - pu1_ref_half_x_left[col];
148
81.3M
            pi4_sad[1] += ABS(diff);
149
150
81.3M
            diff = src - pu1_ref_half_y[col];
151
81.3M
            pi4_sad[2] += ABS(diff);
152
153
81.3M
            diff = src - pu1_ref_half_y_top[col];
154
81.3M
            pi4_sad[3] += ABS(diff);
155
156
81.3M
            diff = src - pu1_ref_half_xy[col];
157
81.3M
            pi4_sad[4] += ABS(diff);
158
159
81.3M
            diff = src - pu1_ref_half_xy_left[col];
160
81.3M
            pi4_sad[5] += ABS(diff);
161
162
81.3M
            diff = src - pu1_ref_half_xy_top[col];
163
81.3M
            pi4_sad[6] += ABS(diff);
164
165
81.3M
            diff = src - pu1_ref_half_xy_top_left[col];
166
81.3M
            pi4_sad[7] += ABS(diff);
167
81.3M
        }
168
169
5.20M
        pu1_src += src_strd;
170
171
5.20M
        pu1_ref_half_x += ref_strd;
172
5.20M
        pu1_ref_half_x_left += ref_strd;
173
174
5.20M
        pu1_ref_half_y += ref_strd;
175
5.20M
        pu1_ref_half_y_top += ref_strd;
176
177
5.20M
        pu1_ref_half_xy += ref_strd;
178
5.20M
        pu1_ref_half_xy_left += ref_strd;
179
5.20M
        pu1_ref_half_xy_top += ref_strd;
180
5.20M
        pu1_ref_half_xy_top_left += ref_strd;
181
5.20M
    }
182
326k
}
183
184
/**
185
*******************************************************************************
186
*
187
* @brief compute sad
188
*
189
* @par Description: This function computes the sad at vertices of diamond grid
190
* centered at reference pointer and at unit distance from it.
191
*
192
* @param[in] pu1_ref
193
*  UWORD8 pointer to the reference
194
*
195
* @param[out] pu1_src
196
*  UWORD8 pointer to the source
197
*
198
* @param[in] ref_strd
199
*  integer reference stride
200
*
201
* @param[in] src_strd
202
*  integer source stride
203
*
204
* @param[out] pi4_sad
205
*  pointer to integer array evaluated sad
206
*
207
* @returns  sad at all evaluated vertexes
208
*
209
* @remarks  none
210
*
211
*******************************************************************************
212
*/
213
void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
214
                             UWORD8 *pu1_src,
215
                             WORD32 ref_strd,
216
                             WORD32 src_strd,
217
                             WORD32 *pi4_sad)
218
979k
{
219
220
    /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
221
979k
    UWORD8 *left_ptr    = pu1_ref - 1;
222
979k
    UWORD8 *right_ptr   = pu1_ref + 1;
223
979k
    UWORD8 *top_ptr     = pu1_ref - ref_strd;
224
979k
    UWORD8 *bot_ptr     = pu1_ref + ref_strd;
225
226
    /* temp var */
227
979k
    WORD32 count2, count3;
228
979k
    UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
229
979k
    UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
230
231
979k
    memset(pi4_sad, 0, 4 * sizeof(WORD32));
232
233
16.5M
    for(count2 = MB_SIZE; count2 > 0; count2--)
234
15.5M
    {
235
256M
        for(count3 = MB_SIZE; count3 > 0 ; count3--)
236
241M
        {
237
241M
            WORD32 src;
238
241M
            WORD32 diff;
239
240
241M
            src = *pu1_src++;
241
242
241M
            diff = src - *left_ptr++;
243
241M
            pi4_sad[0] += ABS(diff);
244
245
241M
            diff = src - *right_ptr++;
246
241M
            pi4_sad[1] += ABS(diff);
247
248
241M
            diff = src - *top_ptr++;
249
241M
            pi4_sad[2] += ABS(diff);
250
251
241M
            diff = src - *bot_ptr++;
252
241M
            pi4_sad[3]  += ABS(diff);
253
241M
        }
254
255
15.5M
        bot_ptr    += u4_ref_buf_offset;
256
15.5M
        left_ptr   += u4_ref_buf_offset;
257
15.5M
        right_ptr  += u4_ref_buf_offset;
258
15.5M
        top_ptr    += u4_ref_buf_offset;
259
260
15.5M
        pu1_src += u4_cur_buf_offset;
261
15.5M
    }
262
263
979k
}
264
265
/**
266
*******************************************************************************
267
*
268
* @brief compute sad
269
*
270
* @par Description: This function computes the sad at vertices of diamond grid
271
* centered at reference pointer and at unit distance from it.
272
*
273
* @param[in] pu1_ref1, pu1_ref2, pu1_ref3
274
*  UWORD8 pointer to the reference
275
*
276
* @param[out] pu1_src
277
*  UWORD8 pointer to the source
278
*
279
* @param[in] ref_strd
280
*  integer reference stride
281
*
282
* @param[in] src_strd
283
*  integer source stride
284
*
285
* @param[out] pi4_sad
286
*  pointer to integer array evaluated sad
287
*
288
* @returns  sad at all evaluated vertexes
289
*
290
* @remarks  none
291
*
292
*******************************************************************************
293
*/
294
void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
295
                             UWORD8 *pu1_ref2,
296
                             UWORD8 *pu1_ref3,
297
                             UWORD8 *pu1_src,
298
                             WORD32 ref_strd,
299
                             WORD32 src_strd,
300
                             WORD32 *pi4_sad)
301
0
{
302
    /* temp var */
303
0
    WORD32 i;
304
0
    UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
305
0
    UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
306
307
0
    for(i = 16; i > 0; i--)
308
0
    {
309
0
        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
310
0
        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
311
0
        USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
312
0
        pu1_src += 4;
313
0
        pu1_ref1 += 4;
314
0
        pu1_ref2 += 4;
315
0
        pu1_ref3 += 4;
316
317
0
        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
318
0
        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
319
0
        USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
320
0
        pu1_src += 4;
321
0
        pu1_ref1 += 4;
322
0
        pu1_ref2 += 4;
323
0
        pu1_ref3 += 4;
324
325
0
        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
326
0
        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
327
0
        USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
328
0
        pu1_src += 4;
329
0
        pu1_ref1 += 4;
330
0
        pu1_ref2 += 4;
331
0
        pu1_ref3 += 4;
332
333
0
        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
334
0
        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
335
0
        USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
336
0
        pu1_src += 4;
337
0
        pu1_ref1 += 4;
338
0
        pu1_ref2 += 4;
339
0
        pu1_ref3 += 4;
340
341
0
        pu1_src += u4_cur_buf_offset;
342
0
        pu1_ref1 += u4_ref_buf_offset;
343
0
        pu1_ref2 += u4_ref_buf_offset;
344
0
        pu1_ref3 += u4_ref_buf_offset;
345
0
    }
346
347
0
}
348
349
/**
350
*******************************************************************************
351
*
352
* @brief compute sad
353
*
354
* @par Description: This function computes the sad at vertices of diamond grid
355
* centered at reference pointer and at unit distance from it.
356
*
357
* @param[in] pu1_ref1, pu1_ref2
358
*  UWORD8 pointer to the reference
359
*
360
* @param[out] pu1_src
361
*  UWORD8 pointer to the source
362
*
363
* @param[in] ref_strd
364
*  integer reference stride
365
*
366
* @param[in] src_strd
367
*  integer source stride
368
*
369
* @param[out] pi4_sad
370
*  pointer to integer array evaluated sad
371
*
372
* @returns  sad at all evaluated vertexes
373
*
374
* @remarks  none
375
*
376
*******************************************************************************
377
*/
378
void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
379
                             UWORD8 *pu1_ref2,
380
                             UWORD8 *pu1_src,
381
                             WORD32 ref_strd,
382
                             WORD32 src_strd,
383
                             WORD32 *pi4_sad)
384
0
{
385
    /* temp var */
386
0
    WORD32 i;
387
0
    UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
388
0
    UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
389
390
0
    for(i = 16; i > 0; i--)
391
0
    {
392
0
        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
393
0
        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
394
0
        pu1_src += 4;
395
0
        pu1_ref1 += 4;
396
0
        pu1_ref2 += 4;
397
398
0
        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
399
0
        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
400
0
        pu1_src += 4;
401
0
        pu1_ref1 += 4;
402
0
        pu1_ref2 += 4;
403
404
0
        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
405
0
        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
406
0
        pu1_src += 4;
407
0
        pu1_ref1 += 4;
408
0
        pu1_ref2 += 4;
409
410
0
        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
411
0
        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
412
0
        pu1_src += 4;
413
0
        pu1_ref1 += 4;
414
0
        pu1_ref2 += 4;
415
416
0
        pu1_src += u4_cur_buf_offset;
417
0
        pu1_ref1 += u4_ref_buf_offset;
418
0
        pu1_ref2 += u4_ref_buf_offset;
419
0
    }
420
421
0
}
422
423
/**
424
******************************************************************************
425
*
426
* @brief computes distortion (SAD) between 2 16x16 blocks
427
*
428
* @par   Description
429
*   This functions computes SAD between 2 16x16 blocks. There is a provision
430
*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
431
*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
432
*
433
* @param[in] pu1_src
434
*  UWORD8 pointer to the source
435
*
436
* @param[out] pu1_dst
437
*  UWORD8 pointer to the destination
438
*
439
* @param[in] src_strd
440
*  integer source stride
441
*
442
* @param[in] dst_strd
443
*  integer destination stride
444
*
445
* @param[in] i4_max_sad
446
*  integer maximum allowed distortion
447
*
448
* @param[out] pi4_mb_distortion
449
*  integer evaluated sad
450
*
451
* @remarks
452
*
453
******************************************************************************
454
*/
455
void ime_compute_sad_16x16(UWORD8 *pu1_src,
456
                           UWORD8 *pu1_est,
457
                           WORD32 src_strd,
458
                           WORD32 est_strd,
459
                           WORD32 i4_max_sad,
460
                           WORD32 *pi4_mb_distortion)
461
4.04M
{
462
4.04M
    WORD32 i4_sad = 0;
463
4.04M
    UWORD32 u4_src_offset = src_strd - 16;
464
4.04M
    UWORD32 u4_est_offset = est_strd - 16;
465
4.04M
    UWORD32 i;
466
467
4.04M
GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
468
469
62.0M
    for(i = 16; i > 0; i--)
470
58.9M
    {
471
58.9M
        USADA8(pu1_src, pu1_est, i4_sad);
472
58.9M
        pu1_src += 4;
473
58.9M
        pu1_est += 4;
474
475
58.9M
        USADA8(pu1_src, pu1_est, i4_sad);
476
58.9M
        pu1_src += 4;
477
58.9M
        pu1_est += 4;
478
479
58.9M
        USADA8(pu1_src, pu1_est, i4_sad);
480
58.9M
        pu1_src += 4;
481
58.9M
        pu1_est += 4;
482
483
58.9M
        USADA8(pu1_src, pu1_est, i4_sad);
484
58.9M
        pu1_src += 4;
485
58.9M
        pu1_est += 4;
486
487
        /* early exit */
488
58.9M
        if(i4_max_sad < i4_sad)
489
935k
        {
490
491
935k
GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
492
493
935k
            *pi4_mb_distortion = i4_sad;
494
935k
            return ;
495
935k
        }
496
58.0M
        pu1_src += u4_src_offset;
497
58.0M
        pu1_est += u4_est_offset;
498
58.0M
    }
499
500
3.10M
    *pi4_mb_distortion = i4_sad;
501
3.10M
    return ;
502
4.04M
}
503
504
/**
505
******************************************************************************
506
*
507
* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
508
*
509
* @par   Description
510
*   This functions computes SAD between 2 16x16 blocks. There is a provision
511
*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
512
*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
513
*
514
* @param[in] pu1_src
515
*  UWORD8 pointer to the source
516
*
517
* @param[out] pu1_dst
518
*  UWORD8 pointer to the destination
519
*
520
* @param[in] src_strd
521
*  integer source stride
522
*
523
* @param[in] dst_strd
524
*  integer destination stride
525
*
526
* @param[in] i4_max_sad
527
*  integer maximum allowed distortion
528
*
529
* @param[out] pi4_mb_distortion
530
*  integer evaluated sad
531
*
532
* @remarks
533
*
534
******************************************************************************
535
*/
536
void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
537
                                UWORD8 *pu1_est,
538
                                WORD32 src_strd,
539
                                WORD32 est_strd,
540
                                WORD32 i4_max_sad,
541
                                WORD32 *pi4_mb_distortion)
542
0
{
543
0
    WORD32 i4_sad = 0;
544
0
    UWORD32 u4_src_offset = 2 * src_strd - 16;
545
0
    UWORD32 u4_est_offset = 2 * est_strd - 16;
546
0
    UWORD32 i;
547
548
0
    UNUSED(i4_max_sad);
549
550
0
    for(i = 16; i > 0; i-= 2)
551
0
    {
552
0
        USADA8(pu1_src, pu1_est, i4_sad);
553
0
        pu1_src += 4;
554
0
        pu1_est += 4;
555
556
0
        USADA8(pu1_src, pu1_est, i4_sad);
557
0
        pu1_src += 4;
558
0
        pu1_est += 4;
559
560
0
        USADA8(pu1_src, pu1_est, i4_sad);
561
0
        pu1_src += 4;
562
0
        pu1_est += 4;
563
564
0
        USADA8(pu1_src, pu1_est, i4_sad);
565
0
        pu1_src += 4;
566
0
        pu1_est += 4;
567
568
0
        pu1_src += u4_src_offset;
569
0
        pu1_est += u4_est_offset;
570
0
    }
571
572
0
    *pi4_mb_distortion = (i4_sad << 1);
573
0
    return ;
574
0
}
575
576
/**
577
******************************************************************************
578
*
579
*  @brief computes distortion (SAD) between 2 8x8 blocks
580
*
581
*  @par   Description
582
*   This functions computes SAD between 2 8x8 blocks. There is a provision
583
*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
584
*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
585
*
586
* @param[in] pu1_src
587
*  UWORD8 pointer to the source
588
*
589
* @param[out] pu1_dst
590
*  UWORD8 pointer to the destination
591
*
592
* @param[in] src_strd
593
*  integer source stride
594
*
595
* @param[in] dst_strd
596
*  integer destination stride
597
*
598
* @param[in] u4_max_sad
599
*  integer maximum allowed distortion
600
*
601
* @param[out] i4_sad
602
*  integer evaluated sad
603
*
604
* @remarks
605
*
606
******************************************************************************
607
 */
608
void ime_compute_sad_8x8(UWORD8 *pu1_src,
609
                         UWORD8 *pu1_est,
610
                         WORD32 src_strd,
611
                         WORD32 est_strd,
612
                         WORD32 i4_max_sad,
613
                         WORD32 *pi4_mb_distortion)
614
0
{
615
0
    WORD32 i4_sad = 0;
616
0
    UWORD32 u4_src_offset = src_strd - 8;
617
0
    UWORD32 u4_est_offset = est_strd - 8;
618
0
    UWORD32 i, j;
619
0
    WORD16 temp;
620
621
0
    for(i = 8; i > 0; i--)
622
0
    {
623
0
        for(j = 8; j > 0; j--)
624
0
        {
625
            /* SAD */
626
0
            temp = *pu1_src++ - *pu1_est++;
627
0
            i4_sad += ABS(temp);
628
0
        }
629
        /* early exit */
630
0
        if(i4_max_sad < i4_sad)
631
0
        {
632
0
            *pi4_mb_distortion = i4_sad;
633
0
            return;
634
0
        }
635
0
        pu1_src += u4_src_offset;
636
0
        pu1_est += u4_est_offset;
637
0
    }
638
0
    *pi4_mb_distortion = i4_sad;
639
0
}
640
641
/**
642
******************************************************************************
643
*
644
*  @brief computes distortion (SAD) between 2 4x4 blocks
645
*
646
*  @par   Description
647
*   This functions computes SAD between 2 4x4 blocks. There is a provision
648
*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
649
*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
650
*
651
* @param[in] pu1_src
652
*  UWORD8 pointer to the source
653
*
654
* @param[out] pu1_dst
655
*  UWORD8 pointer to the destination
656
*
657
* @param[in] src_strd
658
*  integer source stride
659
*
660
* @param[in] dst_strd
661
*  integer destination stride
662
*
663
* @param[in] u4_max_sad
664
*  integer maximum allowed distortion
665
*
666
* @param[out] pi4_mb_distortion
667
*  integer evaluated sad
668
*
669
* @remarks
670
*
671
******************************************************************************
672
*/
673
void ime_compute_sad_4x4(UWORD8 *pu1_src,
674
                         UWORD8 *pu1_est,
675
                         WORD32 src_strd,
676
                         WORD32 est_strd,
677
                         WORD32 i4_max_sad,
678
                         WORD32 *pi4_mb_distortion)
679
0
{
680
0
    WORD32 i4_sad = 0;
681
682
0
    UNUSED(i4_max_sad);
683
684
0
    USADA8(pu1_src, pu1_est, i4_sad);
685
0
    pu1_src += src_strd;
686
0
    pu1_est += est_strd;
687
688
0
    USADA8(pu1_src, pu1_est, i4_sad);
689
0
    pu1_src += src_strd;
690
0
    pu1_est += est_strd;
691
692
0
    USADA8(pu1_src, pu1_est, i4_sad);
693
0
    pu1_src += src_strd;
694
0
    pu1_est += est_strd;
695
696
0
    USADA8(pu1_src, pu1_est, i4_sad);
697
0
    *pi4_mb_distortion = i4_sad;
698
0
}
699
700
/**
701
******************************************************************************
702
*
703
*  @brief computes distortion (SAD) between 2 16x8  blocks
704
*
705
*  @par   Description
706
*   This functions computes SAD between 2 16x8 blocks. There is a provision
707
*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
708
*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
709
*
710
* @param[in] pu1_src
711
*  UWORD8 pointer to the source
712
*
713
* @param[out] pu1_dst
714
*  UWORD8 pointer to the destination
715
*
716
* @param[in] src_strd
717
*  integer source stride
718
*
719
* @param[in] dst_strd
720
*  integer destination stride
721
*
722
* @param[in] u4_max_sad
723
*  integer maximum allowed distortion
724
*
725
* @param[out] pi4_mb_distortion
726
*  integer evaluated sad
727
*
728
* @remarks
729
*
730
******************************************************************************
731
*/
732
void ime_compute_sad_16x8(UWORD8 *pu1_src,
733
                          UWORD8 *pu1_est,
734
                          WORD32 src_strd,
735
                          WORD32 est_strd,
736
                          WORD32 i4_max_sad,
737
                          WORD32 *pi4_mb_distortion)
738
2.24M
{
739
2.24M
    WORD32 i4_sad = 0;
740
2.24M
    UWORD32 u4_src_offset = src_strd - 16;
741
2.24M
    UWORD32 u4_est_offset = est_strd - 16;
742
2.24M
    UWORD32 i, j;
743
2.24M
    WORD16 temp;
744
745
2.24M
GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
746
747
19.3M
    for(i = 8; i > 0; i--)
748
17.1M
    {
749
292M
        for(j = 16; j > 0; j--)
750
274M
        {
751
            /* SAD */
752
274M
            temp = *pu1_src++ - *pu1_est++;
753
274M
            i4_sad += ABS(temp);
754
274M
        }
755
        /* early exit */
756
17.1M
        if(i4_max_sad < i4_sad)
757
134k
        {
758
759
134k
GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
760
761
134k
            *pi4_mb_distortion = i4_sad;
762
763
134k
            return;
764
134k
        }
765
17.0M
        pu1_src += u4_src_offset;
766
17.0M
        pu1_est += u4_est_offset;
767
17.0M
    }
768
769
2.10M
    *pi4_mb_distortion = i4_sad;
770
2.10M
    return;
771
772
2.24M
}
773
774
/**
775
******************************************************************************
776
*
777
* @brief computes distortion (SAD) between 2 16x16 blocks
778
*
779
* @par   Description
780
*   This functions computes SAD between 2 16x16 blocks. There is a provision
781
*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
782
*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
783
*
784
* @param[in] pu1_src
785
*  UWORD8 pointer to the source
786
*
787
* @param[out] pu1_dst
788
*  UWORD8 pointer to the destination
789
*
790
* @param[in] src_strd
791
*  integer source stride
792
*
793
* @param[in] dst_strd
794
*  integer destination stride
795
*
796
* @param[in] i4_max_sad
797
*  integer maximum allowed distortion
798
*
799
* @param[out] pi4_mb_distortion
800
*  integer evaluated sad
801
*
802
* @remarks
803
*
804
******************************************************************************
805
*/
806
void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
807
                               UWORD8 *pu1_est,
808
                               WORD32 src_strd,
809
                               WORD32 est_strd,
810
                               WORD32 i4_max_sad,
811
                               WORD32 *pi4_mb_distortion)
812
0
{
813
0
    WORD32 i4_sad = 0;
814
0
    UWORD32 u4_src_offset = src_strd - 16;
815
0
    UWORD32 u4_est_offset = est_strd - 16;
816
0
    UWORD32 i, j;
817
0
    WORD16 temp;
818
0
    UWORD8 *pu1_src_temp = pu1_src + src_strd;
819
0
    UWORD8 *pu1_est_temp = pu1_est + est_strd;
820
821
0
    for(i = 16; i > 0; i -= 2)
822
0
    {
823
0
        for(j = 16; j > 0; j--)
824
0
        {
825
            /* SAD */
826
0
            temp = *pu1_src++ - *pu1_est++;
827
0
            i4_sad += ABS(temp);
828
0
        }
829
830
0
        pu1_src += (u4_src_offset + src_strd);
831
0
        pu1_est += (u4_est_offset + est_strd);
832
833
0
    }
834
835
    /* early exit */
836
0
    if(i4_max_sad < i4_sad)
837
0
    {
838
0
        *pi4_mb_distortion = i4_sad;
839
0
        return;
840
0
    }
841
842
0
    pu1_src = pu1_src_temp;
843
0
    pu1_est = pu1_est_temp;
844
845
0
    for(i = 16; i > 0; i -= 2)
846
0
    {
847
0
        for(j = 16; j > 0; j--)
848
0
        {
849
            /* SAD */
850
0
            temp = *pu1_src++ - *pu1_est++;
851
0
            i4_sad += ABS(temp);
852
0
        }
853
854
0
        pu1_src += u4_src_offset + src_strd;
855
0
        pu1_est += u4_est_offset + est_strd;
856
0
    }
857
858
0
    *pi4_mb_distortion = i4_sad;
859
0
    return;
860
0
}
861
862
/**
863
*******************************************************************************
864
*
865
* @brief This function computes SAD between two 16x16 blocks. It also computes
866
* if the block will be zero after H264 transform and quant
867
*
868
* @param[in] pu1_src
869
*  UWORD8 pointer to the source
870
*
871
* @param[out] pu1_est
872
*  UWORD8 pointer to the estimated block
873
*
874
* @param[in] i4_src_strd
875
*  source stride
876
*
877
* @param[in] i4_est_strd
878
*  est buffer stride
879
*
880
* @param[in] pu2_thrsh
881
*  Threshold for each element of transformed quantized block
882
*
883
* @param[out] pi4_mb_distortion
884
*  evaluated sad
885
*
886
* @param[out] pu4_is_zero
887
*  Pointer to store if the block is zero after transform and quantization
888
*
889
* @remarks
890
*
891
******************************************************************************
892
*/
893
void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
894
                                       UWORD8 *pu1_est,
895
                                       WORD32 i4_src_strd,
896
                                       WORD32 i4_est_strd,
897
                                       UWORD16 *pu2_thrsh,
898
                                       WORD32 *pi4_mb_distortion,
899
                                       UWORD32 *pu4_is_non_zero)
900
412k
{
901
412k
    WORD32 i, j;
902
412k
    WORD16 s1, s2, s3, s4;
903
412k
    WORD16 sad_1, sad_2;
904
412k
    WORD16 ls1, ls2, ls3, ls4, ls5, ls6, ls7, ls8;
905
412k
    UWORD8 *pu1_src_lp, *pu1_est_lp;
906
412k
    UWORD32 sad = 0;
907
908
412k
    (*pi4_mb_distortion) = 0;
909
910
2.06M
    for (i = 0; i < 4; i++)
911
1.65M
    {
912
8.21M
        for (j = 0; j < 4; j++)
913
6.56M
        {
914
6.56M
            pu1_src_lp = pu1_src + 4 * j;
915
6.56M
            pu1_est_lp = pu1_est + 4 * j;
916
917
6.56M
            s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
918
6.56M
            s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
919
920
6.56M
            pu1_src_lp += i4_src_strd;
921
6.56M
            pu1_est_lp += i4_est_strd;
922
923
6.56M
            s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
924
6.56M
            s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
925
926
6.56M
            pu1_src_lp += i4_src_strd;
927
6.56M
            pu1_est_lp += i4_est_strd;
928
929
6.56M
            s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
930
6.56M
            s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
931
932
6.56M
            pu1_src_lp += i4_src_strd;
933
6.56M
            pu1_est_lp += i4_est_strd;
934
935
6.56M
            s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
936
6.56M
            s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
937
938
6.56M
            sad_1 = s1 + s2 + s3 + s4;
939
940
6.56M
            if (sad == 0)
941
1.96M
            {
942
1.96M
                sad_2 = sad_1 << 1;
943
944
1.96M
                ls1 = sad_2 - (s2 + s3);
945
1.96M
                ls2 = sad_2 - (s1 + s4);
946
1.96M
                ls3 = sad_2 - (s3 + s4);
947
1.96M
                ls4 = sad_2 - (s3 - (s1 << 1));
948
1.96M
                ls5 = sad_2 - (s4 - (s2 << 1));
949
1.96M
                ls6 = sad_2 - (s1 + s2);
950
1.96M
                ls7 = sad_2 - (s2 - (s4 << 1));
951
1.96M
                ls8 = sad_2 - (s1 - (s3 << 1));
952
953
1.96M
                if (pu2_thrsh[8] <= sad_1 ||
954
1.96M
                    pu2_thrsh[0] <= ls2 || pu2_thrsh[1] <= ls1 ||
955
1.96M
                    pu2_thrsh[2] <= ls8 || pu2_thrsh[3] <= ls5 ||
956
1.96M
                    pu2_thrsh[4] <= ls6 || pu2_thrsh[5] <= ls3 ||
957
1.96M
                    pu2_thrsh[6] <= ls7 || pu2_thrsh[7] <= ls4) {
958
331k
                  sad = 1;
959
331k
                }
960
1.96M
            }
961
6.56M
            (*pi4_mb_distortion) += sad_1;
962
6.56M
        }
963
1.65M
        pu1_src +=  (i4_src_strd * 4);
964
1.65M
        pu1_est +=  (i4_est_strd * 4);
965
1.65M
    }
966
412k
    *pu4_is_non_zero = sad;
967
412k
}
968
969
/**
970
******************************************************************************
971
*
972
* @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma
973
*  blocks
974
*
975
* @par Description
976
*  This functions computes SAD between2 16x8 chroma blocks(interleaved).  It
977
*  also checks if the SATQD, Sum of absolute transformed quantized difference
978
*  between the blocks. If SAQTD is zero, it gives back zero Other wise sad is
979
*  returned. There is no provison for early exit. The transform done here is
980
*  the transform for chroma blocks in H264
981
*
982
* @param[in] pu1_src
983
*  UWORD8 pointer to the source
984
*
985
* @param[out] pu1_dst
986
*  UWORD8 pointer to the destination
987
*
988
* @param[in] src_strd
989
*  integer source stride
990
*
991
* @param[in] dst_strd
992
*  integer destination stride
993
*
994
* @param[in] pu2_thrsh
995
*  Threshold for each element of transofrmed quantized block
996
*
997
* @param[out] pi4_mb_distortion
998
*  integer evaluated sad
999
*
1000
* @remarks
1001
*
1002
******************************************************************************
1003
*/
1004
void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
1005
                                   UWORD8 *pu1_est,
1006
                                   WORD32 src_strd,
1007
                                   WORD32 est_strd,
1008
                                   WORD32 max_sad,
1009
                                   UWORD16 *thrsh)
1010
0
{
1011
0
    WORD32 i, j, plane;
1012
0
    WORD16 s1, s2, s3, s4;
1013
0
    WORD16 sad_1, sad_2;
1014
0
    WORD16 ls1, ls2, ls3, ls4, ls5, ls6, ls7, ls8;
1015
0
    UWORD8 *pu1_src_lp, *pu1_est_lp, *pu1_src_plane, *pu1_est_plane;
1016
0
    WORD32 sad = 0;
1017
1018
0
    UNUSED(max_sad);
1019
0
    pu1_src_plane = pu1_src;
1020
0
    pu1_est_plane = pu1_est;
1021
1022
0
    for (plane = 0; plane < 2; plane++)
1023
0
    {
1024
0
        for (i = 0; i < 4; i++)
1025
0
        {
1026
0
            for (j = 0; j < 4; j++)
1027
0
            {
1028
0
                pu1_src_lp = pu1_src + 8 * j;
1029
0
                pu1_est_lp = pu1_est + 8 * j;
1030
1031
0
                s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1032
0
                s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]) + ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1033
1034
0
                pu1_src_lp += src_strd;
1035
0
                pu1_est_lp += est_strd;
1036
1037
0
                s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1038
0
                s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]) + ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1039
1040
0
                pu1_src_lp += src_strd;
1041
0
                pu1_est_lp += est_strd;
1042
1043
0
                s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1044
0
                s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]) + ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1045
1046
0
                pu1_src_lp += src_strd;
1047
0
                pu1_est_lp += est_strd;
1048
1049
0
                s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1050
0
                s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]) + ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1051
1052
0
                sad_1 = s1 + s2 + s3 + s4;
1053
0
                sad_2 = sad_1 << 1;
1054
1055
0
                ls1 = sad_2 - (s2 + s3);
1056
0
                ls2 = sad_2 - (s1 + s4);
1057
0
                ls3 = sad_2 - (s3 + s4);
1058
0
                ls4 = sad_2 - (s3 - (s1 << 1));
1059
0
                ls5 = sad_2 - (s4 - (s2 << 1));
1060
0
                ls6 = sad_2 - (s1 + s2);
1061
0
                ls7 = sad_2 - (s2 - (s4 << 1));
1062
0
                ls8 = sad_2 - (s1 - (s3 << 1));
1063
1064
0
                if (thrsh[1] > ls1 && thrsh[2] > sad_1 && thrsh[3] > ls2 &&
1065
0
                    thrsh[4] > ls3 && thrsh[5] > ls4 && thrsh[6] > ls3 && thrsh[7] > ls5 &&
1066
0
                    thrsh[8] > sad_1 && thrsh[9] > ls1 && thrsh[10] > sad_1 && thrsh[11] > ls2 &&
1067
0
                    thrsh[12] > ls6 && thrsh[13] > ls7 && thrsh[14] > ls6 && thrsh[15] > ls8)
1068
0
                {
1069
                    /*set current sad to be zero*/
1070
0
                }
1071
0
                else
1072
0
                    return ;
1073
1074
0
                sad += sad_1;
1075
0
            }
1076
0
            pu1_src +=  (src_strd *4);
1077
0
            pu1_est +=  (est_strd *4);
1078
0
        }
1079
0
        if (sad < (thrsh[0] << 1))
1080
0
            sad = 0;
1081
0
        else
1082
0
            return;
1083
1084
0
        pu1_src = pu1_src_plane + 1;
1085
0
        pu1_est = pu1_est_plane + 1;
1086
0
    }
1087
0
    return ;
1088
0
}
1089
1090
/**
1091
******************************************************************************
1092
*
1093
* @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
1094
*
1095
* @par   Description
1096
*  This functions computes SAD between2 16x8 chroma blocks(interleaved).  It
1097
*  also checks if the SATQD, Sum of absolute transformed quantized difference
1098
*  between the blocks. If SAQTD is zero, it gives back zero Other wise sad is
1099
*  returned. There is no provison for early exit. The transform done here is the
1100
*  transform for intra 16x16 blocks in H264
1101
*
1102
* @param[in] pu1_src
1103
*  UWORD8 pointer to the source
1104
*
1105
* @param[out] pu1_dst
1106
*  UWORD8 pointer to the destination
1107
*
1108
* @param[in] src_strd
1109
*  integer source stride
1110
*
1111
* @param[in] dst_strd
1112
*  integer destination stride
1113
*
1114
* @param[in] pu2_thrsh
1115
*  Threshold for each element of transofrmed quantized block
1116
*
1117
* @param[out] pi4_mb_distortion
1118
*  integer evaluated sad
1119
*
1120
* @remarks
1121
*
1122
******************************************************************************
1123
*/
1124
void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
1125
                                       UWORD8 *pu1_est,
1126
                                       WORD32 src_strd,
1127
                                       WORD32 est_strd,
1128
                                       WORD32 max_sad,
1129
                                       UWORD16 *thrsh,
1130
                                       WORD32 *pi4_mb_distortion,
1131
                                       UWORD8 *sig_nz_sad)
1132
0
{
1133
0
    UWORD32 i, j;
1134
0
    WORD16 s1[4], s2[4], s3[4], s4[4], sad[4];
1135
0
    UWORD8 *pu1_src_lp, *pu1_est_lp;
1136
0
    UWORD8 *sig_sad_dc;
1137
0
    UWORD32 nz_sad_sig = 0;
1138
1139
0
    UNUSED(max_sad);
1140
0
    *pi4_mb_distortion = 0;
1141
0
    sig_sad_dc = sig_nz_sad;
1142
0
    sig_nz_sad++;
1143
1144
0
    for (i = 0; i < 4; i++)
1145
0
    {
1146
0
        for (j = 0; j < 4; j++)
1147
0
        {
1148
0
            pu1_src_lp = pu1_src + 4 * j;
1149
0
            pu1_est_lp = pu1_est + 4 * j;
1150
1151
0
            s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1152
0
            s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1153
1154
0
            pu1_src_lp += src_strd;
1155
0
            pu1_est_lp += est_strd;
1156
1157
0
            s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1158
0
            s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1159
1160
0
            pu1_src_lp += src_strd;
1161
0
            pu1_est_lp += est_strd;
1162
1163
0
            s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1164
0
            s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1165
1166
0
            pu1_src_lp += src_strd;
1167
0
            pu1_est_lp += est_strd;
1168
1169
0
            s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1170
0
            s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1171
1172
0
            sad[j] = ((s1[j] + s2[j] + s3[j] + s4[j]) << 1);
1173
0
        }
1174
1175
0
        for (j = 0; j < 4; j++)
1176
0
        {
1177
1178
0
            if (thrsh[1] > (sad[j] - (s2[j] + s3[j])) && thrsh[2] > (sad[j] >> 1)
1179
0
                && thrsh[3] > (sad[j] - (s1[j] + s4[j])) &&
1180
1181
0
                thrsh[4] > (sad[j] - (s3[j] + s4[j]))
1182
0
                && thrsh[5] > (sad[j] - (s3[j] - (s1[j] << 1)))
1183
0
                && thrsh[6] > (sad[j] - (s3[j] + s4[j]))
1184
0
                && thrsh[7] > (sad[j] - (s4[j] - (s2[j] << 1))) &&
1185
1186
0
                thrsh[8] > (sad[j] >> 1)
1187
0
                && thrsh[9] > (sad[j] - (s2[j] + s3[j]))
1188
0
                && thrsh[10] > (sad[j] >> 1)
1189
0
                && thrsh[11] > (sad[j] - (s1[j] + s4[j])) &&
1190
1191
0
                thrsh[12] > (sad[j] - (s1[j] + s2[j]))
1192
0
                && thrsh[13] > (sad[j] - (s2[j] - (s4[j] << 1)))
1193
0
                && thrsh[14] > (sad[j] - (s1[j] + s2[j]))
1194
0
                && thrsh[15] > (sad[j] - (s1[j] - (s3[j] << 1))))
1195
0
            {
1196
                //sad[j] = 0;   /*set current sad to be zero*/
1197
0
                sig_nz_sad[j] = 0;/*Signal that the sad is zero*/
1198
0
            }
1199
0
            else
1200
0
            {
1201
0
                sig_nz_sad[j] = 1;/*signal that sad is non zero*/
1202
0
                nz_sad_sig = 1;
1203
0
            }
1204
1205
0
            (*pi4_mb_distortion) += (sad[j] >> 1);
1206
            //if ((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/
1207
0
        }
1208
1209
0
        sig_nz_sad += 4;
1210
0
        pu1_src += (src_strd * 4);
1211
0
        pu1_est += (est_strd * 4);
1212
0
    }
1213
1214
0
    if ((*pi4_mb_distortion) < thrsh[0] << 2)
1215
0
    {
1216
0
        *sig_sad_dc = 0;
1217
0
        if (nz_sad_sig == 0)
1218
0
            (*pi4_mb_distortion) = 0;
1219
0
    }
1220
0
    else
1221
0
    {
1222
0
        *sig_sad_dc = 1;
1223
0
    }
1224
0
}
1225
1226