Coverage Report

Created: 2025-08-11 08:01

/src/openh264/codec/processing/src/vaacalc/vaacalcfuncs.cpp
Line
Count
Source (jump to first uncovered line)
1
/*!
2
 * \copy
3
 *     Copyright (c)  2013, Cisco Systems
4
 *     All rights reserved.
5
 *
6
 *     Redistribution and use in source and binary forms, with or without
7
 *     modification, are permitted provided that the following conditions
8
 *     are met:
9
 *
10
 *        * Redistributions of source code must retain the above copyright
11
 *          notice, this list of conditions and the following disclaimer.
12
 *
13
 *        * Redistributions in binary form must reproduce the above copyright
14
 *          notice, this list of conditions and the following disclaimer in
15
 *          the documentation and/or other materials provided with the
16
 *          distribution.
17
 *
18
 *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
 *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
 *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21
 *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22
 *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23
 *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24
 *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
 *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
 *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
 *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28
 *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
 *     POSSIBILITY OF SUCH DAMAGE.
30
 *
31
 */
32
33
#include "util.h"
34
35
WELSVP_NAMESPACE_BEGIN
36
37
void VAACalcSadSsd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
38
                      int32_t iPicStride,
39
0
                      int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16) {
40
0
  const uint8_t* tmp_ref = pRefData;
41
0
  const uint8_t* tmp_cur = pCurData;
42
0
  int32_t iMbWidth = (iPicWidth >> 4);
43
0
  int32_t mb_height = (iPicHeight >> 4);
44
0
  int32_t mb_index = 0;
45
0
  int32_t pic_stride_x8 = iPicStride << 3;
46
0
  int32_t step = (iPicStride << 4) - iPicWidth;
47
48
0
  *pFrameSad = 0;
49
0
  for (int32_t i = 0; i < mb_height; i ++) {
50
0
    for (int32_t j = 0; j < iMbWidth; j ++) {
51
0
      int32_t k, l;
52
0
      int32_t l_sad, l_sqdiff, l_sum, l_sqsum;
53
0
      const uint8_t* tmp_cur_row;
54
0
      const uint8_t* tmp_ref_row;
55
56
0
      pSum16x16[mb_index] = 0;
57
0
      psqsum16x16[mb_index] = 0;
58
0
      psqdiff16x16[mb_index] = 0;
59
60
0
      l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
61
0
      tmp_cur_row = tmp_cur;
62
0
      tmp_ref_row = tmp_ref;
63
0
      for (k = 0; k < 8; k ++) {
64
0
        for (l = 0; l < 8; l ++) {
65
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
66
0
          l_sad += diff;
67
0
          l_sqdiff += diff * diff;
68
0
          l_sum += tmp_cur_row[l];
69
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
70
0
        }
71
0
        tmp_cur_row += iPicStride;
72
0
        tmp_ref_row += iPicStride;
73
0
      }
74
0
      *pFrameSad += l_sad;
75
0
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
76
0
      pSum16x16[mb_index] += l_sum;
77
0
      psqsum16x16[mb_index] += l_sqsum;
78
0
      psqdiff16x16[mb_index] += l_sqdiff;
79
80
0
      l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
81
0
      tmp_cur_row = tmp_cur + 8;
82
0
      tmp_ref_row = tmp_ref + 8;
83
0
      for (k = 0; k < 8; k ++) {
84
0
        for (l = 0; l < 8; l ++) {
85
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
86
0
          l_sad += diff;
87
0
          l_sqdiff += diff * diff;
88
0
          l_sum += tmp_cur_row[l];
89
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
90
0
        }
91
0
        tmp_cur_row += iPicStride;
92
0
        tmp_ref_row += iPicStride;
93
0
      }
94
0
      *pFrameSad += l_sad;
95
0
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
96
0
      pSum16x16[mb_index] += l_sum;
97
0
      psqsum16x16[mb_index] += l_sqsum;
98
0
      psqdiff16x16[mb_index] += l_sqdiff;
99
100
0
      l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
101
0
      tmp_cur_row = tmp_cur + pic_stride_x8;
102
0
      tmp_ref_row = tmp_ref + pic_stride_x8;
103
0
      for (k = 0; k < 8; k ++) {
104
0
        for (l = 0; l < 8; l ++) {
105
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
106
0
          l_sad += diff;
107
0
          l_sqdiff += diff * diff;
108
0
          l_sum += tmp_cur_row[l];
109
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
110
0
        }
111
0
        tmp_cur_row += iPicStride;
112
0
        tmp_ref_row += iPicStride;
113
0
      }
114
0
      *pFrameSad += l_sad;
115
0
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
116
0
      pSum16x16[mb_index] += l_sum;
117
0
      psqsum16x16[mb_index] += l_sqsum;
118
0
      psqdiff16x16[mb_index] += l_sqdiff;
119
120
0
      l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
121
0
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
122
0
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
123
0
      for (k = 0; k < 8; k ++) {
124
0
        for (l = 0; l < 8; l ++) {
125
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
126
0
          l_sad += diff;
127
0
          l_sqdiff += diff * diff;
128
0
          l_sum += tmp_cur_row[l];
129
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
130
0
        }
131
0
        tmp_cur_row += iPicStride;
132
0
        tmp_ref_row += iPicStride;
133
0
      }
134
0
      *pFrameSad += l_sad;
135
0
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
136
0
      pSum16x16[mb_index] += l_sum;
137
0
      psqsum16x16[mb_index] += l_sqsum;
138
0
      psqdiff16x16[mb_index] += l_sqdiff;
139
140
141
0
      tmp_ref += 16;
142
0
      tmp_cur += 16;
143
0
      ++mb_index;
144
0
    }
145
0
    tmp_ref += step;
146
0
    tmp_cur += step;
147
0
  }
148
0
}
149
void VAACalcSadVar_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
150
                      int32_t iPicStride,
151
0
                      int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16) {
152
0
  const uint8_t* tmp_ref = pRefData;
153
0
  const uint8_t* tmp_cur = pCurData;
154
0
  int32_t iMbWidth = (iPicWidth >> 4);
155
0
  int32_t mb_height = (iPicHeight >> 4);
156
0
  int32_t mb_index = 0;
157
0
  int32_t pic_stride_x8 = iPicStride << 3;
158
0
  int32_t step = (iPicStride << 4) - iPicWidth;
159
160
0
  *pFrameSad = 0;
161
0
  for (int32_t i = 0; i < mb_height; i ++) {
162
0
    for (int32_t j = 0; j < iMbWidth; j ++) {
163
0
      int32_t k, l;
164
0
      int32_t l_sad, l_sum, l_sqsum;
165
0
      const uint8_t* tmp_cur_row;
166
0
      const uint8_t* tmp_ref_row;
167
168
0
      pSum16x16[mb_index] = 0;
169
0
      psqsum16x16[mb_index] = 0;
170
171
0
      l_sad =  l_sum =  l_sqsum = 0;
172
0
      tmp_cur_row = tmp_cur;
173
0
      tmp_ref_row = tmp_ref;
174
0
      for (k = 0; k < 8; k ++) {
175
0
        for (l = 0; l < 8; l ++) {
176
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
177
0
          l_sad += diff;
178
0
          l_sum += tmp_cur_row[l];
179
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
180
0
        }
181
0
        tmp_cur_row += iPicStride;
182
0
        tmp_ref_row += iPicStride;
183
0
      }
184
0
      *pFrameSad += l_sad;
185
0
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
186
0
      pSum16x16[mb_index] += l_sum;
187
0
      psqsum16x16[mb_index] += l_sqsum;
188
189
0
      l_sad =  l_sum =  l_sqsum = 0;
190
0
      tmp_cur_row = tmp_cur + 8;
191
0
      tmp_ref_row = tmp_ref + 8;
192
0
      for (k = 0; k < 8; k ++) {
193
0
        for (l = 0; l < 8; l ++) {
194
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
195
0
          l_sad += diff;
196
0
          l_sum += tmp_cur_row[l];
197
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
198
0
        }
199
0
        tmp_cur_row += iPicStride;
200
0
        tmp_ref_row += iPicStride;
201
0
      }
202
0
      *pFrameSad += l_sad;
203
0
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
204
0
      pSum16x16[mb_index] += l_sum;
205
0
      psqsum16x16[mb_index] += l_sqsum;
206
207
0
      l_sad =  l_sum =  l_sqsum = 0;
208
0
      tmp_cur_row = tmp_cur + pic_stride_x8;
209
0
      tmp_ref_row = tmp_ref + pic_stride_x8;
210
0
      for (k = 0; k < 8; k ++) {
211
0
        for (l = 0; l < 8; l ++) {
212
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
213
0
          l_sad += diff;
214
0
          l_sum += tmp_cur_row[l];
215
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
216
0
        }
217
0
        tmp_cur_row += iPicStride;
218
0
        tmp_ref_row += iPicStride;
219
0
      }
220
0
      *pFrameSad += l_sad;
221
0
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
222
0
      pSum16x16[mb_index] += l_sum;
223
0
      psqsum16x16[mb_index] += l_sqsum;
224
225
0
      l_sad =  l_sum =  l_sqsum = 0;
226
0
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
227
0
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
228
0
      for (k = 0; k < 8; k ++) {
229
0
        for (l = 0; l < 8; l ++) {
230
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
231
0
          l_sad += diff;
232
0
          l_sum += tmp_cur_row[l];
233
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
234
0
        }
235
0
        tmp_cur_row += iPicStride;
236
0
        tmp_ref_row += iPicStride;
237
0
      }
238
0
      *pFrameSad += l_sad;
239
0
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
240
0
      pSum16x16[mb_index] += l_sum;
241
0
      psqsum16x16[mb_index] += l_sqsum;
242
243
244
0
      tmp_ref += 16;
245
0
      tmp_cur += 16;
246
0
      ++mb_index;
247
0
    }
248
0
    tmp_ref += step;
249
0
    tmp_cur += step;
250
0
  }
251
0
}
252
253
254
void VAACalcSad_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
255
                   int32_t iPicStride,
256
0
                   int32_t* pFrameSad, int32_t* pSad8x8) {
257
0
  const uint8_t* tmp_ref = pRefData;
258
0
  const uint8_t* tmp_cur = pCurData;
259
0
  int32_t iMbWidth = (iPicWidth >> 4);
260
0
  int32_t mb_height = (iPicHeight >> 4);
261
0
  int32_t mb_index = 0;
262
0
  int32_t pic_stride_x8 = iPicStride << 3;
263
0
  int32_t step = (iPicStride << 4) - iPicWidth;
264
265
0
  *pFrameSad = 0;
266
0
  for (int32_t i = 0; i < mb_height; i ++) {
267
0
    for (int32_t j = 0; j < iMbWidth; j ++) {
268
0
      int32_t k, l;
269
0
      int32_t l_sad;
270
0
      const uint8_t* tmp_cur_row;
271
0
      const uint8_t* tmp_ref_row;
272
273
0
      l_sad =  0;
274
0
      tmp_cur_row = tmp_cur;
275
0
      tmp_ref_row = tmp_ref;
276
0
      for (k = 0; k < 8; k ++) {
277
0
        for (l = 0; l < 8; l ++) {
278
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
279
0
          l_sad += diff;
280
0
        }
281
0
        tmp_cur_row += iPicStride;
282
0
        tmp_ref_row += iPicStride;
283
0
      }
284
0
      *pFrameSad += l_sad;
285
0
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
286
287
0
      l_sad =  0;
288
0
      tmp_cur_row = tmp_cur + 8;
289
0
      tmp_ref_row = tmp_ref + 8;
290
0
      for (k = 0; k < 8; k ++) {
291
0
        for (l = 0; l < 8; l ++) {
292
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
293
0
          l_sad += diff;
294
0
        }
295
0
        tmp_cur_row += iPicStride;
296
0
        tmp_ref_row += iPicStride;
297
0
      }
298
0
      *pFrameSad += l_sad;
299
0
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
300
301
0
      l_sad =  0;
302
0
      tmp_cur_row = tmp_cur + pic_stride_x8;
303
0
      tmp_ref_row = tmp_ref + pic_stride_x8;
304
0
      for (k = 0; k < 8; k ++) {
305
0
        for (l = 0; l < 8; l ++) {
306
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
307
0
          l_sad += diff;
308
0
        }
309
0
        tmp_cur_row += iPicStride;
310
0
        tmp_ref_row += iPicStride;
311
0
      }
312
0
      *pFrameSad += l_sad;
313
0
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
314
315
0
      l_sad =  0;
316
0
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
317
0
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
318
0
      for (k = 0; k < 8; k ++) {
319
0
        for (l = 0; l < 8; l ++) {
320
0
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
321
0
          l_sad += diff;
322
0
        }
323
0
        tmp_cur_row += iPicStride;
324
0
        tmp_ref_row += iPicStride;
325
0
      }
326
0
      *pFrameSad += l_sad;
327
0
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
328
329
0
      tmp_ref += 16;
330
0
      tmp_cur += 16;
331
0
      ++mb_index;
332
0
    }
333
0
    tmp_ref += step;
334
0
    tmp_cur += step;
335
0
  }
336
0
}
337
338
void VAACalcSadSsdBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
339
                         int32_t iPicStride,
340
                         int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16, int32_t* pSd8x8,
341
                         uint8_t* pMad8x8)
342
343
0
{
344
0
  const uint8_t* tmp_ref = pRefData;
345
0
  const uint8_t* tmp_cur = pCurData;
346
0
  int32_t iMbWidth = (iPicWidth >> 4);
347
0
  int32_t mb_height = (iPicHeight >> 4);
348
0
  int32_t mb_index = 0;
349
0
  int32_t pic_stride_x8 = iPicStride << 3;
350
0
  int32_t step = (iPicStride << 4) - iPicWidth;
351
352
0
  *pFrameSad = 0;
353
0
  for (int32_t i = 0; i < mb_height; i ++) {
354
0
    for (int32_t j = 0; j < iMbWidth; j ++) {
355
0
      int32_t k, l;
356
0
      int32_t l_sad, l_sqdiff, l_sum, l_sqsum, l_sd, l_mad;
357
0
      const uint8_t* tmp_cur_row;
358
0
      const uint8_t* tmp_ref_row;
359
360
0
      pSum16x16[mb_index] = 0;
361
0
      psqsum16x16[mb_index] = 0;
362
0
      psqdiff16x16[mb_index] = 0;
363
364
0
      l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
365
0
      tmp_cur_row = tmp_cur;
366
0
      tmp_ref_row = tmp_ref;
367
0
      for (k = 0; k < 8; k ++) {
368
0
        for (l = 0; l < 8; l ++) {
369
0
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
370
0
          int32_t abs_diff = WELS_ABS (diff);
371
372
0
          l_sd += diff;
373
0
          if (abs_diff > l_mad) {
374
0
            l_mad = abs_diff;
375
0
          }
376
0
          l_sad += abs_diff;
377
0
          l_sqdiff += abs_diff * abs_diff;
378
0
          l_sum += tmp_cur_row[l];
379
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
380
0
        }
381
0
        tmp_cur_row += iPicStride;
382
0
        tmp_ref_row += iPicStride;
383
0
      }
384
0
      *pFrameSad += l_sad;
385
0
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
386
0
      pSum16x16[mb_index] += l_sum;
387
0
      psqsum16x16[mb_index] += l_sqsum;
388
0
      psqdiff16x16[mb_index] += l_sqdiff;
389
0
      pSd8x8[ (mb_index << 2) + 0] = l_sd;
390
0
      pMad8x8[ (mb_index << 2) + 0] = l_mad;
391
392
393
0
      l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
394
0
      tmp_cur_row = tmp_cur + 8;
395
0
      tmp_ref_row = tmp_ref + 8;
396
0
      for (k = 0; k < 8; k ++) {
397
0
        for (l = 0; l < 8; l ++) {
398
0
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
399
0
          int32_t abs_diff = WELS_ABS (diff);
400
401
0
          l_sd += diff;
402
0
          if (abs_diff > l_mad) {
403
0
            l_mad = abs_diff;
404
0
          }
405
0
          l_sad += abs_diff;
406
0
          l_sqdiff += abs_diff * abs_diff;
407
0
          l_sum += tmp_cur_row[l];
408
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
409
0
        }
410
0
        tmp_cur_row += iPicStride;
411
0
        tmp_ref_row += iPicStride;
412
0
      }
413
0
      *pFrameSad += l_sad;
414
0
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
415
0
      pSum16x16[mb_index] += l_sum;
416
0
      psqsum16x16[mb_index] += l_sqsum;
417
0
      psqdiff16x16[mb_index] += l_sqdiff;
418
0
      pSd8x8[ (mb_index << 2) + 1] = l_sd;
419
0
      pMad8x8[ (mb_index << 2) + 1] = l_mad;
420
421
0
      l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
422
0
      tmp_cur_row = tmp_cur + pic_stride_x8;
423
0
      tmp_ref_row = tmp_ref + pic_stride_x8;
424
0
      for (k = 0; k < 8; k ++) {
425
0
        for (l = 0; l < 8; l ++) {
426
0
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
427
0
          int32_t abs_diff = WELS_ABS (diff);
428
429
0
          l_sd += diff;
430
0
          if (abs_diff > l_mad) {
431
0
            l_mad = abs_diff;
432
0
          }
433
0
          l_sad += abs_diff;
434
0
          l_sqdiff += abs_diff * abs_diff;
435
0
          l_sum += tmp_cur_row[l];
436
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
437
0
        }
438
0
        tmp_cur_row += iPicStride;
439
0
        tmp_ref_row += iPicStride;
440
0
      }
441
0
      *pFrameSad += l_sad;
442
0
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
443
0
      pSum16x16[mb_index] += l_sum;
444
0
      psqsum16x16[mb_index] += l_sqsum;
445
0
      psqdiff16x16[mb_index] += l_sqdiff;
446
0
      pSd8x8[ (mb_index << 2) + 2] = l_sd;
447
0
      pMad8x8[ (mb_index << 2) + 2] = l_mad;
448
449
0
      l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
450
0
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
451
0
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
452
0
      for (k = 0; k < 8; k ++) {
453
0
        for (l = 0; l < 8; l ++) {
454
0
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
455
0
          int32_t abs_diff = WELS_ABS (diff);
456
457
0
          l_sd += diff;
458
0
          if (abs_diff > l_mad) {
459
0
            l_mad = abs_diff;
460
0
          }
461
0
          l_sad += abs_diff;
462
0
          l_sqdiff += abs_diff * abs_diff;
463
0
          l_sum += tmp_cur_row[l];
464
0
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
465
0
        }
466
0
        tmp_cur_row += iPicStride;
467
0
        tmp_ref_row += iPicStride;
468
0
      }
469
0
      *pFrameSad += l_sad;
470
0
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
471
0
      pSum16x16[mb_index] += l_sum;
472
0
      psqsum16x16[mb_index] += l_sqsum;
473
0
      psqdiff16x16[mb_index] += l_sqdiff;
474
0
      pSd8x8[ (mb_index << 2) + 3] = l_sd;
475
0
      pMad8x8[ (mb_index << 2) + 3] = l_mad;
476
477
0
      tmp_ref += 16;
478
0
      tmp_cur += 16;
479
0
      ++mb_index;
480
0
    }
481
0
    tmp_ref += step;
482
0
    tmp_cur += step;
483
0
  }
484
0
}
485
486
void VAACalcSadBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
487
                      int32_t iPicStride,
488
0
                      int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8, uint8_t* pMad8x8) {
489
0
  const uint8_t* tmp_ref = pRefData;
490
0
  const uint8_t* tmp_cur = pCurData;
491
0
  int32_t iMbWidth = (iPicWidth >> 4);
492
0
  int32_t mb_height = (iPicHeight >> 4);
493
0
  int32_t mb_index = 0;
494
0
  int32_t pic_stride_x8 = iPicStride << 3;
495
0
  int32_t step = (iPicStride << 4) - iPicWidth;
496
497
0
  *pFrameSad = 0;
498
0
  for (int32_t i = 0; i < mb_height; i ++) {
499
0
    for (int32_t j = 0; j < iMbWidth; j ++) {
500
0
      int32_t k, l;
501
0
      int32_t l_sad, l_sd, l_mad;
502
0
      const uint8_t* tmp_cur_row;
503
0
      const uint8_t* tmp_ref_row;
504
505
0
      l_mad = l_sd = l_sad =  0;
506
0
      tmp_cur_row = tmp_cur;
507
0
      tmp_ref_row = tmp_ref;
508
0
      for (k = 0; k < 8; k ++) {
509
0
        for (l = 0; l < 8; l ++) {
510
0
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
511
0
          int32_t abs_diff = WELS_ABS (diff);
512
0
          l_sd += diff;
513
0
          l_sad += abs_diff;
514
0
          if (abs_diff > l_mad) {
515
0
            l_mad = abs_diff;
516
0
          }
517
0
        }
518
0
        tmp_cur_row += iPicStride;
519
0
        tmp_ref_row += iPicStride;
520
0
      }
521
0
      *pFrameSad += l_sad;
522
0
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
523
0
      pSd8x8[ (mb_index << 2) + 0] = l_sd;
524
0
      pMad8x8[ (mb_index << 2) + 0] = l_mad;
525
526
0
      l_mad = l_sd = l_sad =  0;
527
0
      tmp_cur_row = tmp_cur + 8;
528
0
      tmp_ref_row = tmp_ref + 8;
529
0
      for (k = 0; k < 8; k ++) {
530
0
        for (l = 0; l < 8; l ++) {
531
0
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
532
0
          int32_t abs_diff = WELS_ABS (diff);
533
0
          l_sd += diff;
534
0
          l_sad += abs_diff;
535
0
          if (abs_diff > l_mad) {
536
0
            l_mad = abs_diff;
537
0
          }
538
0
        }
539
0
        tmp_cur_row += iPicStride;
540
0
        tmp_ref_row += iPicStride;
541
0
      }
542
0
      *pFrameSad += l_sad;
543
0
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
544
0
      pSd8x8[ (mb_index << 2) + 1] = l_sd;
545
0
      pMad8x8[ (mb_index << 2) + 1] = l_mad;
546
547
0
      l_mad = l_sd = l_sad =  0;
548
0
      tmp_cur_row = tmp_cur + pic_stride_x8;
549
0
      tmp_ref_row = tmp_ref + pic_stride_x8;
550
0
      for (k = 0; k < 8; k ++) {
551
0
        for (l = 0; l < 8; l ++) {
552
0
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
553
0
          int32_t abs_diff = WELS_ABS (diff);
554
0
          l_sd += diff;
555
0
          l_sad += abs_diff;
556
0
          if (abs_diff > l_mad) {
557
0
            l_mad = abs_diff;
558
0
          }
559
0
        }
560
0
        tmp_cur_row += iPicStride;
561
0
        tmp_ref_row += iPicStride;
562
0
      }
563
0
      *pFrameSad += l_sad;
564
0
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
565
0
      pSd8x8[ (mb_index << 2) + 2] = l_sd;
566
0
      pMad8x8[ (mb_index << 2) + 2] = l_mad;
567
568
0
      l_mad = l_sd = l_sad =  0;
569
0
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
570
0
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
571
0
      for (k = 0; k < 8; k ++) {
572
0
        for (l = 0; l < 8; l ++) {
573
0
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
574
0
          int32_t abs_diff = WELS_ABS (diff);
575
0
          l_sd += diff;
576
0
          l_sad += abs_diff;
577
0
          if (abs_diff > l_mad) {
578
0
            l_mad = abs_diff;
579
0
          }
580
0
        }
581
0
        tmp_cur_row += iPicStride;
582
0
        tmp_ref_row += iPicStride;
583
0
      }
584
0
      *pFrameSad += l_sad;
585
0
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
586
0
      pSd8x8[ (mb_index << 2) + 3] = l_sd;
587
0
      pMad8x8[ (mb_index << 2) + 3] = l_mad;
588
589
0
      tmp_ref += 16;
590
0
      tmp_cur += 16;
591
0
      ++mb_index;
592
0
    }
593
0
    tmp_ref += step;
594
0
    tmp_cur += step;
595
0
  }
596
0
}
597
598
WELSVP_NAMESPACE_END