/src/openh264/codec/processing/src/vaacalc/vaacalcfuncs.cpp

Source (jump to first uncovered line)
/*!
 * \copy
 *     Copyright (c)  2013, Cisco Systems
 *     All rights reserved.
 *
 *     Redistribution and use in source and binary forms, with or without
 *     modification, are permitted provided that the following conditions
 *     are met:
 *
 *        * Redistributions of source code must retain the above copyright
 *          notice, this list of conditions and the following disclaimer.
 *
 *        * Redistributions in binary form must reproduce the above copyright
 *          notice, this list of conditions and the following disclaimer in
 *          the documentation and/or other materials provided with the
 *          distribution.
 *
 *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *     POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include "util.h"

WELSVP_NAMESPACE_BEGIN

void VAACalcSadSsd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
                      int32_t iPicStride,
                      int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16) {
  const uint8_t* tmp_ref = pRefData;
  const uint8_t* tmp_cur = pCurData;
  int32_t iMbWidth = (iPicWidth >> 4);
  int32_t mb_height = (iPicHeight >> 4);
  int32_t mb_index = 0;
  int32_t pic_stride_x8 = iPicStride << 3;
  int32_t step = (iPicStride << 4) - iPicWidth;

  *pFrameSad = 0;
  for (int32_t i = 0; i < mb_height; i ++) {
    for (int32_t j = 0; j < iMbWidth; j ++) {
      int32_t k, l;
      int32_t l_sad, l_sqdiff, l_sum, l_sqsum;
      const uint8_t* tmp_cur_row;
      const uint8_t* tmp_ref_row;

      pSum16x16[mb_index] = 0;
      psqsum16x16[mb_index] = 0;
      psqdiff16x16[mb_index] = 0;

      l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur;
      tmp_ref_row = tmp_ref;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
          l_sqdiff += diff * diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;
      psqdiff16x16[mb_index] += l_sqdiff;

      l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + 8;
      tmp_ref_row = tmp_ref + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
          l_sqdiff += diff * diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;
      psqdiff16x16[mb_index] += l_sqdiff;

      l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + pic_stride_x8;
      tmp_ref_row = tmp_ref + pic_stride_x8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
          l_sqdiff += diff * diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;
      psqdiff16x16[mb_index] += l_sqdiff;

      l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
          l_sqdiff += diff * diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;
      psqdiff16x16[mb_index] += l_sqdiff;


      tmp_ref += 16;
      tmp_cur += 16;
      ++mb_index;
    }
    tmp_ref += step;
    tmp_cur += step;
  }
}
void VAACalcSadVar_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
                      int32_t iPicStride,
                      int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16) {
  const uint8_t* tmp_ref = pRefData;
  const uint8_t* tmp_cur = pCurData;
  int32_t iMbWidth = (iPicWidth >> 4);
  int32_t mb_height = (iPicHeight >> 4);
  int32_t mb_index = 0;
  int32_t pic_stride_x8 = iPicStride << 3;
  int32_t step = (iPicStride << 4) - iPicWidth;

  *pFrameSad = 0;
  for (int32_t i = 0; i < mb_height; i ++) {
    for (int32_t j = 0; j < iMbWidth; j ++) {
      int32_t k, l;
      int32_t l_sad, l_sum, l_sqsum;
      const uint8_t* tmp_cur_row;
      const uint8_t* tmp_ref_row;

      pSum16x16[mb_index] = 0;
      psqsum16x16[mb_index] = 0;

      l_sad =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur;
      tmp_ref_row = tmp_ref;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;

      l_sad =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + 8;
      tmp_ref_row = tmp_ref + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;

      l_sad =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + pic_stride_x8;
      tmp_ref_row = tmp_ref + pic_stride_x8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;

      l_sad =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;


      tmp_ref += 16;
      tmp_cur += 16;
      ++mb_index;
    }
    tmp_ref += step;
    tmp_cur += step;
  }
}


void VAACalcSad_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
                   int32_t iPicStride,
                   int32_t* pFrameSad, int32_t* pSad8x8) {
  const uint8_t* tmp_ref = pRefData;
  const uint8_t* tmp_cur = pCurData;
  int32_t iMbWidth = (iPicWidth >> 4);
  int32_t mb_height = (iPicHeight >> 4);
  int32_t mb_index = 0;
  int32_t pic_stride_x8 = iPicStride << 3;
  int32_t step = (iPicStride << 4) - iPicWidth;

  *pFrameSad = 0;
  for (int32_t i = 0; i < mb_height; i ++) {
    for (int32_t j = 0; j < iMbWidth; j ++) {
      int32_t k, l;
      int32_t l_sad;
      const uint8_t* tmp_cur_row;
      const uint8_t* tmp_ref_row;

      l_sad =  0;
      tmp_cur_row = tmp_cur;
      tmp_ref_row = tmp_ref;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 0] = l_sad;

      l_sad =  0;
      tmp_cur_row = tmp_cur + 8;
      tmp_ref_row = tmp_ref + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 1] = l_sad;

      l_sad =  0;
      tmp_cur_row = tmp_cur + pic_stride_x8;
      tmp_ref_row = tmp_ref + pic_stride_x8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 2] = l_sad;

      l_sad =  0;
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
          l_sad += diff;
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 3] = l_sad;

      tmp_ref += 16;
      tmp_cur += 16;
      ++mb_index;
    }
    tmp_ref += step;
    tmp_cur += step;
  }
}

void VAACalcSadSsdBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
                         int32_t iPicStride,
                         int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16, int32_t* pSd8x8,
                         uint8_t* pMad8x8)

{
  const uint8_t* tmp_ref = pRefData;
  const uint8_t* tmp_cur = pCurData;
  int32_t iMbWidth = (iPicWidth >> 4);
  int32_t mb_height = (iPicHeight >> 4);
  int32_t mb_index = 0;
  int32_t pic_stride_x8 = iPicStride << 3;
  int32_t step = (iPicStride << 4) - iPicWidth;

  *pFrameSad = 0;
  for (int32_t i = 0; i < mb_height; i ++) {
    for (int32_t j = 0; j < iMbWidth; j ++) {
      int32_t k, l;
      int32_t l_sad, l_sqdiff, l_sum, l_sqsum, l_sd, l_mad;
      const uint8_t* tmp_cur_row;
      const uint8_t* tmp_ref_row;

      pSum16x16[mb_index] = 0;
      psqsum16x16[mb_index] = 0;
      psqdiff16x16[mb_index] = 0;

      l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur;
      tmp_ref_row = tmp_ref;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
          int32_t abs_diff = WELS_ABS (diff);

          l_sd += diff;
          if (abs_diff > l_mad) {
            l_mad = abs_diff;
          }
          l_sad += abs_diff;
          l_sqdiff += abs_diff * abs_diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;
      psqdiff16x16[mb_index] += l_sqdiff;
      pSd8x8[ (mb_index << 2) + 0] = l_sd;
      pMad8x8[ (mb_index << 2) + 0] = l_mad;


      l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + 8;
      tmp_ref_row = tmp_ref + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
          int32_t abs_diff = WELS_ABS (diff);

          l_sd += diff;
          if (abs_diff > l_mad) {
            l_mad = abs_diff;
          }
          l_sad += abs_diff;
          l_sqdiff += abs_diff * abs_diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;
      psqdiff16x16[mb_index] += l_sqdiff;
      pSd8x8[ (mb_index << 2) + 1] = l_sd;
      pMad8x8[ (mb_index << 2) + 1] = l_mad;

      l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + pic_stride_x8;
      tmp_ref_row = tmp_ref + pic_stride_x8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
          int32_t abs_diff = WELS_ABS (diff);

          l_sd += diff;
          if (abs_diff > l_mad) {
            l_mad = abs_diff;
          }
          l_sad += abs_diff;
          l_sqdiff += abs_diff * abs_diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;
      psqdiff16x16[mb_index] += l_sqdiff;
      pSd8x8[ (mb_index << 2) + 2] = l_sd;
      pMad8x8[ (mb_index << 2) + 2] = l_mad;

      l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
          int32_t abs_diff = WELS_ABS (diff);

          l_sd += diff;
          if (abs_diff > l_mad) {
            l_mad = abs_diff;
          }
          l_sad += abs_diff;
          l_sqdiff += abs_diff * abs_diff;
          l_sum += tmp_cur_row[l];
          l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
      pSum16x16[mb_index] += l_sum;
      psqsum16x16[mb_index] += l_sqsum;
      psqdiff16x16[mb_index] += l_sqdiff;
      pSd8x8[ (mb_index << 2) + 3] = l_sd;
      pMad8x8[ (mb_index << 2) + 3] = l_mad;

      tmp_ref += 16;
      tmp_cur += 16;
      ++mb_index;
    }
    tmp_ref += step;
    tmp_cur += step;
  }
}

void VAACalcSadBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
                      int32_t iPicStride,
                      int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8, uint8_t* pMad8x8) {
  const uint8_t* tmp_ref = pRefData;
  const uint8_t* tmp_cur = pCurData;
  int32_t iMbWidth = (iPicWidth >> 4);
  int32_t mb_height = (iPicHeight >> 4);
  int32_t mb_index = 0;
  int32_t pic_stride_x8 = iPicStride << 3;
  int32_t step = (iPicStride << 4) - iPicWidth;

  *pFrameSad = 0;
  for (int32_t i = 0; i < mb_height; i ++) {
    for (int32_t j = 0; j < iMbWidth; j ++) {
      int32_t k, l;
      int32_t l_sad, l_sd, l_mad;
      const uint8_t* tmp_cur_row;
      const uint8_t* tmp_ref_row;

      l_mad = l_sd = l_sad =  0;
      tmp_cur_row = tmp_cur;
      tmp_ref_row = tmp_ref;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
          int32_t abs_diff = WELS_ABS (diff);
          l_sd += diff;
          l_sad += abs_diff;
          if (abs_diff > l_mad) {
            l_mad = abs_diff;
          }
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 0] = l_sad;
      pSd8x8[ (mb_index << 2) + 0] = l_sd;
      pMad8x8[ (mb_index << 2) + 0] = l_mad;

      l_mad = l_sd = l_sad =  0;
      tmp_cur_row = tmp_cur + 8;
      tmp_ref_row = tmp_ref + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
          int32_t abs_diff = WELS_ABS (diff);
          l_sd += diff;
          l_sad += abs_diff;
          if (abs_diff > l_mad) {
            l_mad = abs_diff;
          }
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 1] = l_sad;
      pSd8x8[ (mb_index << 2) + 1] = l_sd;
      pMad8x8[ (mb_index << 2) + 1] = l_mad;

      l_mad = l_sd = l_sad =  0;
      tmp_cur_row = tmp_cur + pic_stride_x8;
      tmp_ref_row = tmp_ref + pic_stride_x8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
          int32_t abs_diff = WELS_ABS (diff);
          l_sd += diff;
          l_sad += abs_diff;
          if (abs_diff > l_mad) {
            l_mad = abs_diff;
          }
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 2] = l_sad;
      pSd8x8[ (mb_index << 2) + 2] = l_sd;
      pMad8x8[ (mb_index << 2) + 2] = l_mad;

      l_mad = l_sd = l_sad =  0;
      tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
      tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
      for (k = 0; k < 8; k ++) {
        for (l = 0; l < 8; l ++) {
          int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
          int32_t abs_diff = WELS_ABS (diff);
          l_sd += diff;
          l_sad += abs_diff;
          if (abs_diff > l_mad) {
            l_mad = abs_diff;
          }
        }
        tmp_cur_row += iPicStride;
        tmp_ref_row += iPicStride;
      }
      *pFrameSad += l_sad;
      pSad8x8[ (mb_index << 2) + 3] = l_sad;
      pSd8x8[ (mb_index << 2) + 3] = l_sd;
      pMad8x8[ (mb_index << 2) + 3] = l_mad;

      tmp_ref += 16;
      tmp_cur += 16;
      ++mb_index;
    }
    tmp_ref += step;
    tmp_cur += step;
  }
}

WELSVP_NAMESPACE_END

Coverage Report

Created: 2025-08-11 08:01

Line	Count	Source (jump to first uncovered line)
1		/*!
2		* \copy
3		* Copyright (c) 2013, Cisco Systems
4		* All rights reserved.
5		*
6		* Redistribution and use in source and binary forms, with or without
7		* modification, are permitted provided that the following conditions
8		* are met:
9		*
10		* * Redistributions of source code must retain the above copyright
11		* notice, this list of conditions and the following disclaimer.
12		*
13		* * Redistributions in binary form must reproduce the above copyright
14		* notice, this list of conditions and the following disclaimer in
15		* the documentation and/or other materials provided with the
16		* distribution.
17		*
18		* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19		* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20		* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21		* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22		* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23		* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24		* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25		* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26		* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27		* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28		* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29		* POSSIBILITY OF SUCH DAMAGE.
30		*
31		*/
32
33		#include "util.h"
34
35		WELSVP_NAMESPACE_BEGIN
36
37		void VAACalcSadSsd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
38		int32_t iPicStride,
39	0	int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16) {
40	0	const uint8_t* tmp_ref = pRefData;
41	0	const uint8_t* tmp_cur = pCurData;
42	0	int32_t iMbWidth = (iPicWidth >> 4);
43	0	int32_t mb_height = (iPicHeight >> 4);
44	0	int32_t mb_index = 0;
45	0	int32_t pic_stride_x8 = iPicStride << 3;
46	0	int32_t step = (iPicStride << 4) - iPicWidth;
47
48	0	*pFrameSad = 0;
49	0	for (int32_t i = 0; i < mb_height; i ++) {
50	0	for (int32_t j = 0; j < iMbWidth; j ++) {
51	0	int32_t k, l;
52	0	int32_t l_sad, l_sqdiff, l_sum, l_sqsum;
53	0	const uint8_t* tmp_cur_row;
54	0	const uint8_t* tmp_ref_row;
55
56	0	pSum16x16[mb_index] = 0;
57	0	psqsum16x16[mb_index] = 0;
58	0	psqdiff16x16[mb_index] = 0;
59
60	0	l_sad = l_sqdiff = l_sum = l_sqsum = 0;
61	0	tmp_cur_row = tmp_cur;
62	0	tmp_ref_row = tmp_ref;
63	0	for (k = 0; k < 8; k ++) {
64	0	for (l = 0; l < 8; l ++) {
65	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
66	0	l_sad += diff;
67	0	l_sqdiff += diff * diff;
68	0	l_sum += tmp_cur_row[l];
69	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
70	0	}
71	0	tmp_cur_row += iPicStride;
72	0	tmp_ref_row += iPicStride;
73	0	}
74	0	*pFrameSad += l_sad;
75	0	pSad8x8[ (mb_index << 2) + 0] = l_sad;
76	0	pSum16x16[mb_index] += l_sum;
77	0	psqsum16x16[mb_index] += l_sqsum;
78	0	psqdiff16x16[mb_index] += l_sqdiff;
79
80	0	l_sad = l_sqdiff = l_sum = l_sqsum = 0;
81	0	tmp_cur_row = tmp_cur + 8;
82	0	tmp_ref_row = tmp_ref + 8;
83	0	for (k = 0; k < 8; k ++) {
84	0	for (l = 0; l < 8; l ++) {
85	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
86	0	l_sad += diff;
87	0	l_sqdiff += diff * diff;
88	0	l_sum += tmp_cur_row[l];
89	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
90	0	}
91	0	tmp_cur_row += iPicStride;
92	0	tmp_ref_row += iPicStride;
93	0	}
94	0	*pFrameSad += l_sad;
95	0	pSad8x8[ (mb_index << 2) + 1] = l_sad;
96	0	pSum16x16[mb_index] += l_sum;
97	0	psqsum16x16[mb_index] += l_sqsum;
98	0	psqdiff16x16[mb_index] += l_sqdiff;
99
100	0	l_sad = l_sqdiff = l_sum = l_sqsum = 0;
101	0	tmp_cur_row = tmp_cur + pic_stride_x8;
102	0	tmp_ref_row = tmp_ref + pic_stride_x8;
103	0	for (k = 0; k < 8; k ++) {
104	0	for (l = 0; l < 8; l ++) {
105	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
106	0	l_sad += diff;
107	0	l_sqdiff += diff * diff;
108	0	l_sum += tmp_cur_row[l];
109	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
110	0	}
111	0	tmp_cur_row += iPicStride;
112	0	tmp_ref_row += iPicStride;
113	0	}
114	0	*pFrameSad += l_sad;
115	0	pSad8x8[ (mb_index << 2) + 2] = l_sad;
116	0	pSum16x16[mb_index] += l_sum;
117	0	psqsum16x16[mb_index] += l_sqsum;
118	0	psqdiff16x16[mb_index] += l_sqdiff;
119
120	0	l_sad = l_sqdiff = l_sum = l_sqsum = 0;
121	0	tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
122	0	tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
123	0	for (k = 0; k < 8; k ++) {
124	0	for (l = 0; l < 8; l ++) {
125	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
126	0	l_sad += diff;
127	0	l_sqdiff += diff * diff;
128	0	l_sum += tmp_cur_row[l];
129	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
130	0	}
131	0	tmp_cur_row += iPicStride;
132	0	tmp_ref_row += iPicStride;
133	0	}
134	0	*pFrameSad += l_sad;
135	0	pSad8x8[ (mb_index << 2) + 3] = l_sad;
136	0	pSum16x16[mb_index] += l_sum;
137	0	psqsum16x16[mb_index] += l_sqsum;
138	0	psqdiff16x16[mb_index] += l_sqdiff;
139
140
141	0	tmp_ref += 16;
142	0	tmp_cur += 16;
143	0	++mb_index;
144	0	}
145	0	tmp_ref += step;
146	0	tmp_cur += step;
147	0	}
148	0	}
149		void VAACalcSadVar_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
150		int32_t iPicStride,
151	0	int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16) {
152	0	const uint8_t* tmp_ref = pRefData;
153	0	const uint8_t* tmp_cur = pCurData;
154	0	int32_t iMbWidth = (iPicWidth >> 4);
155	0	int32_t mb_height = (iPicHeight >> 4);
156	0	int32_t mb_index = 0;
157	0	int32_t pic_stride_x8 = iPicStride << 3;
158	0	int32_t step = (iPicStride << 4) - iPicWidth;
159
160	0	*pFrameSad = 0;
161	0	for (int32_t i = 0; i < mb_height; i ++) {
162	0	for (int32_t j = 0; j < iMbWidth; j ++) {
163	0	int32_t k, l;
164	0	int32_t l_sad, l_sum, l_sqsum;
165	0	const uint8_t* tmp_cur_row;
166	0	const uint8_t* tmp_ref_row;
167
168	0	pSum16x16[mb_index] = 0;
169	0	psqsum16x16[mb_index] = 0;
170
171	0	l_sad = l_sum = l_sqsum = 0;
172	0	tmp_cur_row = tmp_cur;
173	0	tmp_ref_row = tmp_ref;
174	0	for (k = 0; k < 8; k ++) {
175	0	for (l = 0; l < 8; l ++) {
176	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
177	0	l_sad += diff;
178	0	l_sum += tmp_cur_row[l];
179	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
180	0	}
181	0	tmp_cur_row += iPicStride;
182	0	tmp_ref_row += iPicStride;
183	0	}
184	0	*pFrameSad += l_sad;
185	0	pSad8x8[ (mb_index << 2) + 0] = l_sad;
186	0	pSum16x16[mb_index] += l_sum;
187	0	psqsum16x16[mb_index] += l_sqsum;
188
189	0	l_sad = l_sum = l_sqsum = 0;
190	0	tmp_cur_row = tmp_cur + 8;
191	0	tmp_ref_row = tmp_ref + 8;
192	0	for (k = 0; k < 8; k ++) {
193	0	for (l = 0; l < 8; l ++) {
194	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
195	0	l_sad += diff;
196	0	l_sum += tmp_cur_row[l];
197	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
198	0	}
199	0	tmp_cur_row += iPicStride;
200	0	tmp_ref_row += iPicStride;
201	0	}
202	0	*pFrameSad += l_sad;
203	0	pSad8x8[ (mb_index << 2) + 1] = l_sad;
204	0	pSum16x16[mb_index] += l_sum;
205	0	psqsum16x16[mb_index] += l_sqsum;
206
207	0	l_sad = l_sum = l_sqsum = 0;
208	0	tmp_cur_row = tmp_cur + pic_stride_x8;
209	0	tmp_ref_row = tmp_ref + pic_stride_x8;
210	0	for (k = 0; k < 8; k ++) {
211	0	for (l = 0; l < 8; l ++) {
212	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
213	0	l_sad += diff;
214	0	l_sum += tmp_cur_row[l];
215	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
216	0	}
217	0	tmp_cur_row += iPicStride;
218	0	tmp_ref_row += iPicStride;
219	0	}
220	0	*pFrameSad += l_sad;
221	0	pSad8x8[ (mb_index << 2) + 2] = l_sad;
222	0	pSum16x16[mb_index] += l_sum;
223	0	psqsum16x16[mb_index] += l_sqsum;
224
225	0	l_sad = l_sum = l_sqsum = 0;
226	0	tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
227	0	tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
228	0	for (k = 0; k < 8; k ++) {
229	0	for (l = 0; l < 8; l ++) {
230	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
231	0	l_sad += diff;
232	0	l_sum += tmp_cur_row[l];
233	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
234	0	}
235	0	tmp_cur_row += iPicStride;
236	0	tmp_ref_row += iPicStride;
237	0	}
238	0	*pFrameSad += l_sad;
239	0	pSad8x8[ (mb_index << 2) + 3] = l_sad;
240	0	pSum16x16[mb_index] += l_sum;
241	0	psqsum16x16[mb_index] += l_sqsum;
242
243
244	0	tmp_ref += 16;
245	0	tmp_cur += 16;
246	0	++mb_index;
247	0	}
248	0	tmp_ref += step;
249	0	tmp_cur += step;
250	0	}
251	0	}
252
253
254		void VAACalcSad_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
255		int32_t iPicStride,
256	0	int32_t* pFrameSad, int32_t* pSad8x8) {
257	0	const uint8_t* tmp_ref = pRefData;
258	0	const uint8_t* tmp_cur = pCurData;
259	0	int32_t iMbWidth = (iPicWidth >> 4);
260	0	int32_t mb_height = (iPicHeight >> 4);
261	0	int32_t mb_index = 0;
262	0	int32_t pic_stride_x8 = iPicStride << 3;
263	0	int32_t step = (iPicStride << 4) - iPicWidth;
264
265	0	*pFrameSad = 0;
266	0	for (int32_t i = 0; i < mb_height; i ++) {
267	0	for (int32_t j = 0; j < iMbWidth; j ++) {
268	0	int32_t k, l;
269	0	int32_t l_sad;
270	0	const uint8_t* tmp_cur_row;
271	0	const uint8_t* tmp_ref_row;
272
273	0	l_sad = 0;
274	0	tmp_cur_row = tmp_cur;
275	0	tmp_ref_row = tmp_ref;
276	0	for (k = 0; k < 8; k ++) {
277	0	for (l = 0; l < 8; l ++) {
278	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
279	0	l_sad += diff;
280	0	}
281	0	tmp_cur_row += iPicStride;
282	0	tmp_ref_row += iPicStride;
283	0	}
284	0	*pFrameSad += l_sad;
285	0	pSad8x8[ (mb_index << 2) + 0] = l_sad;
286
287	0	l_sad = 0;
288	0	tmp_cur_row = tmp_cur + 8;
289	0	tmp_ref_row = tmp_ref + 8;
290	0	for (k = 0; k < 8; k ++) {
291	0	for (l = 0; l < 8; l ++) {
292	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
293	0	l_sad += diff;
294	0	}
295	0	tmp_cur_row += iPicStride;
296	0	tmp_ref_row += iPicStride;
297	0	}
298	0	*pFrameSad += l_sad;
299	0	pSad8x8[ (mb_index << 2) + 1] = l_sad;
300
301	0	l_sad = 0;
302	0	tmp_cur_row = tmp_cur + pic_stride_x8;
303	0	tmp_ref_row = tmp_ref + pic_stride_x8;
304	0	for (k = 0; k < 8; k ++) {
305	0	for (l = 0; l < 8; l ++) {
306	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
307	0	l_sad += diff;
308	0	}
309	0	tmp_cur_row += iPicStride;
310	0	tmp_ref_row += iPicStride;
311	0	}
312	0	*pFrameSad += l_sad;
313	0	pSad8x8[ (mb_index << 2) + 2] = l_sad;
314
315	0	l_sad = 0;
316	0	tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
317	0	tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
318	0	for (k = 0; k < 8; k ++) {
319	0	for (l = 0; l < 8; l ++) {
320	0	int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
321	0	l_sad += diff;
322	0	}
323	0	tmp_cur_row += iPicStride;
324	0	tmp_ref_row += iPicStride;
325	0	}
326	0	*pFrameSad += l_sad;
327	0	pSad8x8[ (mb_index << 2) + 3] = l_sad;
328
329	0	tmp_ref += 16;
330	0	tmp_cur += 16;
331	0	++mb_index;
332	0	}
333	0	tmp_ref += step;
334	0	tmp_cur += step;
335	0	}
336	0	}
337
338		void VAACalcSadSsdBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
339		int32_t iPicStride,
340		int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16, int32_t* pSd8x8,
341		uint8_t* pMad8x8)
342
343	0	{
344	0	const uint8_t* tmp_ref = pRefData;
345	0	const uint8_t* tmp_cur = pCurData;
346	0	int32_t iMbWidth = (iPicWidth >> 4);
347	0	int32_t mb_height = (iPicHeight >> 4);
348	0	int32_t mb_index = 0;
349	0	int32_t pic_stride_x8 = iPicStride << 3;
350	0	int32_t step = (iPicStride << 4) - iPicWidth;
351
352	0	*pFrameSad = 0;
353	0	for (int32_t i = 0; i < mb_height; i ++) {
354	0	for (int32_t j = 0; j < iMbWidth; j ++) {
355	0	int32_t k, l;
356	0	int32_t l_sad, l_sqdiff, l_sum, l_sqsum, l_sd, l_mad;
357	0	const uint8_t* tmp_cur_row;
358	0	const uint8_t* tmp_ref_row;
359
360	0	pSum16x16[mb_index] = 0;
361	0	psqsum16x16[mb_index] = 0;
362	0	psqdiff16x16[mb_index] = 0;
363
364	0	l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
365	0	tmp_cur_row = tmp_cur;
366	0	tmp_ref_row = tmp_ref;
367	0	for (k = 0; k < 8; k ++) {
368	0	for (l = 0; l < 8; l ++) {
369	0	int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
370	0	int32_t abs_diff = WELS_ABS (diff);
371
372	0	l_sd += diff;
373	0	if (abs_diff > l_mad) {
374	0	l_mad = abs_diff;
375	0	}
376	0	l_sad += abs_diff;
377	0	l_sqdiff += abs_diff * abs_diff;
378	0	l_sum += tmp_cur_row[l];
379	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
380	0	}
381	0	tmp_cur_row += iPicStride;
382	0	tmp_ref_row += iPicStride;
383	0	}
384	0	*pFrameSad += l_sad;
385	0	pSad8x8[ (mb_index << 2) + 0] = l_sad;
386	0	pSum16x16[mb_index] += l_sum;
387	0	psqsum16x16[mb_index] += l_sqsum;
388	0	psqdiff16x16[mb_index] += l_sqdiff;
389	0	pSd8x8[ (mb_index << 2) + 0] = l_sd;
390	0	pMad8x8[ (mb_index << 2) + 0] = l_mad;
391
392
393	0	l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
394	0	tmp_cur_row = tmp_cur + 8;
395	0	tmp_ref_row = tmp_ref + 8;
396	0	for (k = 0; k < 8; k ++) {
397	0	for (l = 0; l < 8; l ++) {
398	0	int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
399	0	int32_t abs_diff = WELS_ABS (diff);
400
401	0	l_sd += diff;
402	0	if (abs_diff > l_mad) {
403	0	l_mad = abs_diff;
404	0	}
405	0	l_sad += abs_diff;
406	0	l_sqdiff += abs_diff * abs_diff;
407	0	l_sum += tmp_cur_row[l];
408	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
409	0	}
410	0	tmp_cur_row += iPicStride;
411	0	tmp_ref_row += iPicStride;
412	0	}
413	0	*pFrameSad += l_sad;
414	0	pSad8x8[ (mb_index << 2) + 1] = l_sad;
415	0	pSum16x16[mb_index] += l_sum;
416	0	psqsum16x16[mb_index] += l_sqsum;
417	0	psqdiff16x16[mb_index] += l_sqdiff;
418	0	pSd8x8[ (mb_index << 2) + 1] = l_sd;
419	0	pMad8x8[ (mb_index << 2) + 1] = l_mad;
420
421	0	l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
422	0	tmp_cur_row = tmp_cur + pic_stride_x8;
423	0	tmp_ref_row = tmp_ref + pic_stride_x8;
424	0	for (k = 0; k < 8; k ++) {
425	0	for (l = 0; l < 8; l ++) {
426	0	int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
427	0	int32_t abs_diff = WELS_ABS (diff);
428
429	0	l_sd += diff;
430	0	if (abs_diff > l_mad) {
431	0	l_mad = abs_diff;
432	0	}
433	0	l_sad += abs_diff;
434	0	l_sqdiff += abs_diff * abs_diff;
435	0	l_sum += tmp_cur_row[l];
436	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
437	0	}
438	0	tmp_cur_row += iPicStride;
439	0	tmp_ref_row += iPicStride;
440	0	}
441	0	*pFrameSad += l_sad;
442	0	pSad8x8[ (mb_index << 2) + 2] = l_sad;
443	0	pSum16x16[mb_index] += l_sum;
444	0	psqsum16x16[mb_index] += l_sqsum;
445	0	psqdiff16x16[mb_index] += l_sqdiff;
446	0	pSd8x8[ (mb_index << 2) + 2] = l_sd;
447	0	pMad8x8[ (mb_index << 2) + 2] = l_mad;
448
449	0	l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
450	0	tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
451	0	tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
452	0	for (k = 0; k < 8; k ++) {
453	0	for (l = 0; l < 8; l ++) {
454	0	int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
455	0	int32_t abs_diff = WELS_ABS (diff);
456
457	0	l_sd += diff;
458	0	if (abs_diff > l_mad) {
459	0	l_mad = abs_diff;
460	0	}
461	0	l_sad += abs_diff;
462	0	l_sqdiff += abs_diff * abs_diff;
463	0	l_sum += tmp_cur_row[l];
464	0	l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
465	0	}
466	0	tmp_cur_row += iPicStride;
467	0	tmp_ref_row += iPicStride;
468	0	}
469	0	*pFrameSad += l_sad;
470	0	pSad8x8[ (mb_index << 2) + 3] = l_sad;
471	0	pSum16x16[mb_index] += l_sum;
472	0	psqsum16x16[mb_index] += l_sqsum;
473	0	psqdiff16x16[mb_index] += l_sqdiff;
474	0	pSd8x8[ (mb_index << 2) + 3] = l_sd;
475	0	pMad8x8[ (mb_index << 2) + 3] = l_mad;
476
477	0	tmp_ref += 16;
478	0	tmp_cur += 16;
479	0	++mb_index;
480	0	}
481	0	tmp_ref += step;
482	0	tmp_cur += step;
483	0	}
484	0	}
485
486		void VAACalcSadBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
487		int32_t iPicStride,
488	0	int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8, uint8_t* pMad8x8) {
489	0	const uint8_t* tmp_ref = pRefData;
490	0	const uint8_t* tmp_cur = pCurData;
491	0	int32_t iMbWidth = (iPicWidth >> 4);
492	0	int32_t mb_height = (iPicHeight >> 4);
493	0	int32_t mb_index = 0;
494	0	int32_t pic_stride_x8 = iPicStride << 3;
495	0	int32_t step = (iPicStride << 4) - iPicWidth;
496
497	0	*pFrameSad = 0;
498	0	for (int32_t i = 0; i < mb_height; i ++) {
499	0	for (int32_t j = 0; j < iMbWidth; j ++) {
500	0	int32_t k, l;
501	0	int32_t l_sad, l_sd, l_mad;
502	0	const uint8_t* tmp_cur_row;
503	0	const uint8_t* tmp_ref_row;
504
505	0	l_mad = l_sd = l_sad = 0;
506	0	tmp_cur_row = tmp_cur;
507	0	tmp_ref_row = tmp_ref;
508	0	for (k = 0; k < 8; k ++) {
509	0	for (l = 0; l < 8; l ++) {
510	0	int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
511	0	int32_t abs_diff = WELS_ABS (diff);
512	0	l_sd += diff;
513	0	l_sad += abs_diff;
514	0	if (abs_diff > l_mad) {
515	0	l_mad = abs_diff;
516	0	}
517	0	}
518	0	tmp_cur_row += iPicStride;
519	0	tmp_ref_row += iPicStride;
520	0	}
521	0	*pFrameSad += l_sad;
522	0	pSad8x8[ (mb_index << 2) + 0] = l_sad;
523	0	pSd8x8[ (mb_index << 2) + 0] = l_sd;
524	0	pMad8x8[ (mb_index << 2) + 0] = l_mad;
525
526	0	l_mad = l_sd = l_sad = 0;
527	0	tmp_cur_row = tmp_cur + 8;
528	0	tmp_ref_row = tmp_ref + 8;
529	0	for (k = 0; k < 8; k ++) {
530	0	for (l = 0; l < 8; l ++) {
531	0	int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
532	0	int32_t abs_diff = WELS_ABS (diff);
533	0	l_sd += diff;
534	0	l_sad += abs_diff;
535	0	if (abs_diff > l_mad) {
536	0	l_mad = abs_diff;
537	0	}
538	0	}
539	0	tmp_cur_row += iPicStride;
540	0	tmp_ref_row += iPicStride;
541	0	}
542	0	*pFrameSad += l_sad;
543	0	pSad8x8[ (mb_index << 2) + 1] = l_sad;
544	0	pSd8x8[ (mb_index << 2) + 1] = l_sd;
545	0	pMad8x8[ (mb_index << 2) + 1] = l_mad;
546
547	0	l_mad = l_sd = l_sad = 0;
548	0	tmp_cur_row = tmp_cur + pic_stride_x8;
549	0	tmp_ref_row = tmp_ref + pic_stride_x8;
550	0	for (k = 0; k < 8; k ++) {
551	0	for (l = 0; l < 8; l ++) {
552	0	int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
553	0	int32_t abs_diff = WELS_ABS (diff);
554	0	l_sd += diff;
555	0	l_sad += abs_diff;
556	0	if (abs_diff > l_mad) {
557	0	l_mad = abs_diff;
558	0	}
559	0	}
560	0	tmp_cur_row += iPicStride;
561	0	tmp_ref_row += iPicStride;
562	0	}
563	0	*pFrameSad += l_sad;
564	0	pSad8x8[ (mb_index << 2) + 2] = l_sad;
565	0	pSd8x8[ (mb_index << 2) + 2] = l_sd;
566	0	pMad8x8[ (mb_index << 2) + 2] = l_mad;
567
568	0	l_mad = l_sd = l_sad = 0;
569	0	tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
570	0	tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
571	0	for (k = 0; k < 8; k ++) {
572	0	for (l = 0; l < 8; l ++) {
573	0	int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
574	0	int32_t abs_diff = WELS_ABS (diff);
575	0	l_sd += diff;
576	0	l_sad += abs_diff;
577	0	if (abs_diff > l_mad) {
578	0	l_mad = abs_diff;
579	0	}
580	0	}
581	0	tmp_cur_row += iPicStride;
582	0	tmp_ref_row += iPicStride;
583	0	}
584	0	*pFrameSad += l_sad;
585	0	pSad8x8[ (mb_index << 2) + 3] = l_sad;
586	0	pSd8x8[ (mb_index << 2) + 3] = l_sd;
587	0	pMad8x8[ (mb_index << 2) + 3] = l_mad;
588
589	0	tmp_ref += 16;
590	0	tmp_cur += 16;
591	0	++mb_index;
592	0	}
593	0	tmp_ref += step;
594	0	tmp_cur += step;
595	0	}
596	0	}
597
598		WELSVP_NAMESPACE_END