/src/mozilla-central/gfx/ycbcr/yuv_convert.cpp

Source (jump to first uncovered line)
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// This webpage shows layout of YV12 and other YUV formats
// http://www.fourcc.org/yuv.php
// The actual conversion is best described here
// http://en.wikipedia.org/wiki/YUV
// An article on optimizing YUV conversion using tables instead of multiplies
// http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
//
// YV12 is a full plane of Y and a half height, half width chroma planes
// YV16 is a full plane of Y and a full height, half width chroma planes
// YV24 is a full plane of Y and a full height, full width chroma planes
//
// ARGB pixel format is output, which on little endian is stored as BGRA.
// The alpha is set to 255, allowing the application to use RGBA or RGB32.

#include "yuv_convert.h"

#include "gfxPrefs.h"
#include "libyuv.h"
#include "scale_yuv_argb.h"
// Header for low level row functions.
#include "yuv_row.h"
#include "mozilla/SSE.h"

namespace mozilla {

namespace gfx {

// 16.16 fixed point arithmetic
const int kFractionBits = 16;
const int kFractionMax = 1 << kFractionBits;
const int kFractionMask = ((1 << kFractionBits) - 1);

YUVType TypeFromSize(int ywidth,
                     int yheight,
                     int cbcrwidth,
                     int cbcrheight)
{
  if (ywidth == cbcrwidth && yheight == cbcrheight) {
    return YV24;
  }
  else if ((ywidth + 1) / 2 == cbcrwidth && yheight == cbcrheight) {
    return YV16;
  }
  else {
    return YV12;
  }
}

libyuv::FourCC FourCCFromYUVType(YUVType aYUVType)
{
  if (aYUVType == YV24) {
    return libyuv::FOURCC_I444;
  } else if (aYUVType == YV16) {
    return libyuv::FOURCC_I422;
  } else if (aYUVType == YV12) {
    return libyuv::FOURCC_I420;
  } else {
    return libyuv::FOURCC_ANY;
  }
}

// Convert a frame of YUV to 32 bit ARGB.
void ConvertYCbCrToRGB32(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int pic_x,
                         int pic_y,
                         int pic_width,
                         int pic_height,
                         int y_pitch,
                         int uv_pitch,
                         int rgb_pitch,
                         YUVType yuv_type,
                         YUVColorSpace yuv_color_space) {


  // Deprecated function's conversion is accurate.
  // libyuv converion is a bit inaccurate to get performance. It dynamically
  // calculates RGB from YUV to use simd. In it, signed byte is used for conversion's
  // coefficient, but it requests 129. libyuv cut 129 to 127. And only 6 bits are
  // used for a decimal part during the dynamic calculation.
  //
  // The function is still fast on some old intel chips.
  // See Bug 1256475.
  bool use_deprecated = gfxPrefs::YCbCrAccurateConversion() ||
                        (supports_mmx() && supports_sse() && !supports_sse3() &&
                         yuv_color_space == YUVColorSpace::BT601);
  // The deprecated function only support BT601.
  // See Bug 1210357.
  if (yuv_color_space != YUVColorSpace::BT601) {
    use_deprecated = false;
  }
  if (use_deprecated) {
    ConvertYCbCrToRGB32_deprecated(y_buf, u_buf, v_buf, rgb_buf,
                                   pic_x, pic_y, pic_width, pic_height,
                                   y_pitch, uv_pitch, rgb_pitch, yuv_type);
    return;
  }

  if (yuv_type == YV24) {
    const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
    const uint8* src_u = u_buf + uv_pitch * pic_y + pic_x;
    const uint8* src_v = v_buf + uv_pitch * pic_y + pic_x;
    if (yuv_color_space == mozilla::YUVColorSpace::BT709) {
      DebugOnly<int> err = libyuv::H444ToARGB(src_y, y_pitch,
                                              src_u, uv_pitch,
                                              src_v, uv_pitch,
                                              rgb_buf, rgb_pitch,
                                              pic_width, pic_height);
      MOZ_ASSERT(!err);
    } else {
      DebugOnly<int> err = libyuv::I444ToARGB(src_y, y_pitch,
                                              src_u, uv_pitch,
                                              src_v, uv_pitch,
                                              rgb_buf, rgb_pitch,
                                              pic_width, pic_height);
      MOZ_ASSERT(!err);
    }
  } else if (yuv_type == YV16) {
    const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
    const uint8* src_u = u_buf + uv_pitch * pic_y + pic_x / 2;
    const uint8* src_v = v_buf + uv_pitch * pic_y + pic_x / 2;
    if (yuv_color_space == mozilla::YUVColorSpace::BT709) {
      DebugOnly<int> err = libyuv::H422ToARGB(src_y, y_pitch,
                                              src_u, uv_pitch,
                                              src_v, uv_pitch,
                                              rgb_buf, rgb_pitch,
                                              pic_width, pic_height);
      MOZ_ASSERT(!err);
    } else {
      DebugOnly<int> err = libyuv::I422ToARGB(src_y, y_pitch,
                                              src_u, uv_pitch,
                                              src_v, uv_pitch,
                                              rgb_buf, rgb_pitch,
                                              pic_width, pic_height);
      MOZ_ASSERT(!err);
    }
  } else {
    MOZ_ASSERT(yuv_type == YV12);
    const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
    const uint8* src_u = u_buf + (uv_pitch * pic_y + pic_x) / 2;
    const uint8* src_v = v_buf + (uv_pitch * pic_y + pic_x) / 2;
    if (yuv_color_space == mozilla::YUVColorSpace::BT709) {
      DebugOnly<int> err = libyuv::H420ToARGB(src_y, y_pitch,
                                              src_u, uv_pitch,
                                              src_v, uv_pitch,
                                              rgb_buf, rgb_pitch,
                                              pic_width, pic_height);
      MOZ_ASSERT(!err);
    } else {
      DebugOnly<int> err = libyuv::I420ToARGB(src_y, y_pitch,
                                              src_u, uv_pitch,
                                              src_v, uv_pitch,
                                              rgb_buf, rgb_pitch,
                                              pic_width, pic_height);
      MOZ_ASSERT(!err);
    }
  }
}

// Convert a frame of YUV to 32 bit ARGB.
void ConvertYCbCrToRGB32_deprecated(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
                                    uint8* rgb_buf,
                                    int pic_x,
                                    int pic_y,
                                    int pic_width,
                                    int pic_height,
                                    int y_pitch,
                                    int uv_pitch,
                                    int rgb_pitch,
                                    YUVType yuv_type) {
  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
  unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
  bool has_sse = supports_mmx() && supports_sse();
  // There is no optimized YV24 SSE routine so we check for this and
  // fall back to the C code.
  has_sse &= yuv_type != YV24;
  bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
  int x_width = odd_pic_x ? pic_width - 1 : pic_width;

  for (int y = pic_y; y < pic_height + pic_y; ++y) {
    uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
    const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);

    if (odd_pic_x) {
      // Handle the single odd pixel manually and use the
      // fast routines for the remaining.
      FastConvertYUVToRGB32Row_C(y_ptr++,
                                 u_ptr++,
                                 v_ptr++,
                                 rgb_row,
                                 1,
                                 x_shift);
      rgb_row += 4;
    }

    if (has_sse) {
      FastConvertYUVToRGB32Row(y_ptr,
                               u_ptr,
                               v_ptr,
                               rgb_row,
                               x_width);
    }
    else {
      FastConvertYUVToRGB32Row_C(y_ptr,
                                 u_ptr,
                                 v_ptr,
                                 rgb_row,
                                 x_width,
                                 x_shift);
    }
  }

  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
  if (has_sse)
    EMMS();
}

// C version does 8 at a time to mimic MMX code
static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
                         int source_width, int source_y_fraction) {
  int y1_fraction = source_y_fraction;
  int y0_fraction = 256 - y1_fraction;
  uint8* end = ybuf + source_width;
  do {
    ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
    ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
    ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
    ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
    ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
    ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
    ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
    ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
    y0_ptr += 8;
    y1_ptr += 8;
    ybuf += 8;
  } while (ybuf < end);
}

#ifdef MOZILLA_MAY_SUPPORT_MMX
void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
                    int source_width, int source_y_fraction);
#endif

#ifdef MOZILLA_MAY_SUPPORT_SSE2
void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
                     int source_width, int source_y_fraction);
#endif

static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr,
                              const uint8* y1_ptr, int source_width,
                              int source_y_fraction) {
#ifdef MOZILLA_MAY_SUPPORT_SSE2
  if (mozilla::supports_sse2()) {
    FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
    return;
  }
#endif

#ifdef MOZILLA_MAY_SUPPORT_MMX
  if (mozilla::supports_mmx()) {
    FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
    return;
  }
#endif

  FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
}


// Scale a frame of YUV to 32 bit ARGB.
void ScaleYCbCrToRGB32(const uint8* y_buf,
                       const uint8* u_buf,
                       const uint8* v_buf,
                       uint8* rgb_buf,
                       int source_width,
                       int source_height,
                       int width,
                       int height,
                       int y_pitch,
                       int uv_pitch,
                       int rgb_pitch,
                       YUVType yuv_type,
                       YUVColorSpace yuv_color_space,
                       ScaleFilter filter) {

  bool use_deprecated = gfxPrefs::YCbCrAccurateConversion() ||
#if defined(XP_WIN) && defined(_M_X64)
                        // libyuv does not support SIMD scaling on win 64bit. See Bug 1295927.
                        supports_sse3() ||
#endif
                        (supports_mmx() && supports_sse() && !supports_sse3());
  // The deprecated function only support BT601.
  // See Bug 1210357.
  if (yuv_color_space != YUVColorSpace::BT601) {
    use_deprecated = false;
  }
  if (use_deprecated) {
    ScaleYCbCrToRGB32_deprecated(y_buf, u_buf, v_buf,
                                 rgb_buf,
                                 source_width, source_height,
                                 width, height,
                                 y_pitch, uv_pitch,
                                 rgb_pitch,
                                 yuv_type,
                                 ROTATE_0,
                                 filter);
    return;
  }

  DebugOnly<int> err =
    libyuv::YUVToARGBScale(y_buf, y_pitch,
                           u_buf, uv_pitch,
                           v_buf, uv_pitch,
                           FourCCFromYUVType(yuv_type),
                           yuv_color_space,
                           source_width, source_height,
                           rgb_buf, rgb_pitch,
                           width, height,
                           libyuv::kFilterBilinear);
  MOZ_ASSERT(!err);
  return;
}

// Scale a frame of YUV to 32 bit ARGB.
void ScaleYCbCrToRGB32_deprecated(const uint8* y_buf,
                                  const uint8* u_buf,
                                  const uint8* v_buf,
                                  uint8* rgb_buf,
                                  int source_width,
                                  int source_height,
                                  int width,
                                  int height,
                                  int y_pitch,
                                  int uv_pitch,
                                  int rgb_pitch,
                                  YUVType yuv_type,
                                  Rotate view_rotate,
                                  ScaleFilter filter) {
  bool has_mmx = supports_mmx();

  // 4096 allows 3 buffers to fit in 12k.
  // Helps performance on CPU with 16K L1 cache.
  // Large enough for 3830x2160 and 30" displays which are 2560x1600.
  const int kFilterBufferSize = 4096;
  // Disable filtering if the screen is too big (to avoid buffer overflows).
  // This should never happen to regular users: they don't have monitors
  // wider than 4096 pixels.
  // TODO(fbarchard): Allow rotated videos to filter.
  if (source_width > kFilterBufferSize || view_rotate)
    filter = FILTER_NONE;

  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
  // Diagram showing origin and direction of source sampling.
  // ->0   4<-
  // 7       3
  //
  // 6       5
  // ->1   2<-
  // Rotations that start at right side of image.
  if ((view_rotate == ROTATE_180) ||
      (view_rotate == ROTATE_270) ||
      (view_rotate == MIRROR_ROTATE_0) ||
      (view_rotate == MIRROR_ROTATE_90)) {
    y_buf += source_width - 1;
    u_buf += source_width / 2 - 1;
    v_buf += source_width / 2 - 1;
    source_width = -source_width;
  }
  // Rotations that start at bottom of image.
  if ((view_rotate == ROTATE_90) ||
      (view_rotate == ROTATE_180) ||
      (view_rotate == MIRROR_ROTATE_90) ||
      (view_rotate == MIRROR_ROTATE_180)) {
    y_buf += (source_height - 1) * y_pitch;
    u_buf += ((source_height >> y_shift) - 1) * uv_pitch;
    v_buf += ((source_height >> y_shift) - 1) * uv_pitch;
    source_height = -source_height;
  }

  // Handle zero sized destination.
  if (width == 0 || height == 0)
    return;
  int source_dx = source_width * kFractionMax / width;
  int source_dy = source_height * kFractionMax / height;
  int source_dx_uv = source_dx;

  if ((view_rotate == ROTATE_90) ||
      (view_rotate == ROTATE_270)) {
    int tmp = height;
    height = width;
    width = tmp;
    tmp = source_height;
    source_height = source_width;
    source_width = tmp;
    int original_dx = source_dx;
    int original_dy = source_dy;
    source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;
    source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;
    source_dy = original_dx;
    if (view_rotate == ROTATE_90) {
      y_pitch = -1;
      uv_pitch = -1;
      source_height = -source_height;
    } else {
      y_pitch = 1;
      uv_pitch = 1;
    }
  }

  // Need padding because FilterRows() will write 1 to 16 extra pixels
  // after the end for SSE2 version.
  uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
  uint8* ybuf =
      reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15);
  uint8* ubuf = ybuf + kFilterBufferSize;
  uint8* vbuf = ubuf + kFilterBufferSize;
  // TODO(fbarchard): Fixed point math is off by 1 on negatives.
  int yscale_fixed = (source_height << kFractionBits) / height;

  // TODO(fbarchard): Split this into separate function for better efficiency.
  for (int y = 0; y < height; ++y) {
    uint8* dest_pixel = rgb_buf + y * rgb_pitch;
    int source_y_subpixel = (y * yscale_fixed);
    if (yscale_fixed >= (kFractionMax * 2)) {
      source_y_subpixel += kFractionMax / 2;  // For 1/2 or less, center filter.
    }
    int source_y = source_y_subpixel >> kFractionBits;

    const uint8* y0_ptr = y_buf + source_y * y_pitch;
    const uint8* y1_ptr = y0_ptr + y_pitch;

    const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
    const uint8* u1_ptr = u0_ptr + uv_pitch;
    const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
    const uint8* v1_ptr = v0_ptr + uv_pitch;

    // vertical scaler uses 16.8 fixed point
    int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8;
    int source_uv_fraction =
        ((source_y_subpixel >> y_shift) & kFractionMask) >> 8;

    const uint8* y_ptr = y0_ptr;
    const uint8* u_ptr = u0_ptr;
    const uint8* v_ptr = v0_ptr;
    // Apply vertical filtering if necessary.
    // TODO(fbarchard): Remove memcpy when not necessary.
    if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
      if (yscale_fixed != kFractionMax &&
          source_y_fraction && ((source_y + 1) < source_height)) {
        FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
      } else {
        memcpy(ybuf, y0_ptr, source_width);
      }
      y_ptr = ybuf;
      ybuf[source_width] = ybuf[source_width-1];
      int uv_source_width = (source_width + 1) / 2;
      if (yscale_fixed != kFractionMax &&
          source_uv_fraction &&
          (((source_y >> y_shift) + 1) < (source_height >> y_shift))) {
        FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
        FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
      } else {
        memcpy(ubuf, u0_ptr, uv_source_width);
        memcpy(vbuf, v0_ptr, uv_source_width);
      }
      u_ptr = ubuf;
      v_ptr = vbuf;
      ubuf[uv_source_width] = ubuf[uv_source_width - 1];
      vbuf[uv_source_width] = vbuf[uv_source_width - 1];
    }
    if (source_dx == kFractionMax) {  // Not scaled
      FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
                               dest_pixel, width);
    } else if (filter & FILTER_BILINEAR_H) {
        LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
                                 dest_pixel, width, source_dx);
    } else {
// Specialized scalers and rotation.
#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86) && !defined(__clang__)
      if(mozilla::supports_sse()) {
        if (width == (source_width * 2)) {
          DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
                                  dest_pixel, width);
        } else if ((source_dx & kFractionMask) == 0) {
          // Scaling by integer scale factor. ie half.
          ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
                                   dest_pixel, width,
                                   source_dx >> kFractionBits);
        } else if (source_dx_uv == source_dx) {  // Not rotated.
          ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
                             dest_pixel, width, source_dx);
        } else {
          RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
                                         dest_pixel, width,
                                         source_dx >> kFractionBits,
                                         source_dx_uv >> kFractionBits);
        }
      }
      else {
        ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
                             dest_pixel, width, source_dx);
      }
#else
      (void)source_dx_uv;
      ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
                         dest_pixel, width, source_dx);
#endif
    }
  }
  // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
  if (has_mmx)
    EMMS();
}
void ConvertYCbCrAToARGB32(const uint8* y_buf,
                           const uint8* u_buf,
                           const uint8* v_buf,
                           const uint8* a_buf,
                           uint8* argb_buf,
                           int pic_width,
                           int pic_height,
                           int ya_pitch,
                           int uv_pitch,
                           int argb_pitch) {

  // The downstream graphics stack expects an attenuated input, hence why the
  // attenuation parameter is set.
  DebugOnly<int> err = libyuv::I420AlphaToARGB(y_buf, ya_pitch,
                                               u_buf, uv_pitch,
                                               v_buf, uv_pitch,
                                               a_buf, ya_pitch,
                                               argb_buf, argb_pitch,
                                               pic_width, pic_height, 1);
  MOZ_ASSERT(!err);
}

} // namespace gfx
} // namespace mozilla

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2		// Use of this source code is governed by a BSD-style license that can be
3		// found in the LICENSE file.
4
5		// This webpage shows layout of YV12 and other YUV formats
6		// http://www.fourcc.org/yuv.php
7		// The actual conversion is best described here
8		// http://en.wikipedia.org/wiki/YUV
9		// An article on optimizing YUV conversion using tables instead of multiplies
10		// http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
11		//
12		// YV12 is a full plane of Y and a half height, half width chroma planes
13		// YV16 is a full plane of Y and a full height, half width chroma planes
14		// YV24 is a full plane of Y and a full height, full width chroma planes
15		//
16		// ARGB pixel format is output, which on little endian is stored as BGRA.
17		// The alpha is set to 255, allowing the application to use RGBA or RGB32.
18
19		#include "yuv_convert.h"
20
21		#include "gfxPrefs.h"
22		#include "libyuv.h"
23		#include "scale_yuv_argb.h"
24		// Header for low level row functions.
25		#include "yuv_row.h"
26		#include "mozilla/SSE.h"
27
28		namespace mozilla {
29
30		namespace gfx {
31
32		// 16.16 fixed point arithmetic
33		const int kFractionBits = 16;
34		const int kFractionMax = 1 << kFractionBits;
35		const int kFractionMask = ((1 << kFractionBits) - 1);
36
37		YUVType TypeFromSize(int ywidth,
38		int yheight,
39		int cbcrwidth,
40		int cbcrheight)
41	0	{
42	0	if (ywidth == cbcrwidth && yheight == cbcrheight) {
43	0	return YV24;
44	0	}
45	0	else if ((ywidth + 1) / 2 == cbcrwidth && yheight == cbcrheight) {
46	0	return YV16;
47	0	}
48	0	else {
49	0	return YV12;
50	0	}
51	0	}
52
53		libyuv::FourCC FourCCFromYUVType(YUVType aYUVType)
54	0	{
55	0	if (aYUVType == YV24) {
56	0	return libyuv::FOURCC_I444;
57	0	} else if (aYUVType == YV16) {
58	0	return libyuv::FOURCC_I422;
59	0	} else if (aYUVType == YV12) {
60	0	return libyuv::FOURCC_I420;
61	0	} else {
62	0	return libyuv::FOURCC_ANY;
63	0	}
64	0	}
65
66		// Convert a frame of YUV to 32 bit ARGB.
67		void ConvertYCbCrToRGB32(const uint8* y_buf,
68		const uint8* u_buf,
69		const uint8* v_buf,
70		uint8* rgb_buf,
71		int pic_x,
72		int pic_y,
73		int pic_width,
74		int pic_height,
75		int y_pitch,
76		int uv_pitch,
77		int rgb_pitch,
78		YUVType yuv_type,
79	0	YUVColorSpace yuv_color_space) {
80	0
81	0
82	0	// Deprecated function's conversion is accurate.
83	0	// libyuv converion is a bit inaccurate to get performance. It dynamically
84	0	// calculates RGB from YUV to use simd. In it, signed byte is used for conversion's
85	0	// coefficient, but it requests 129. libyuv cut 129 to 127. And only 6 bits are
86	0	// used for a decimal part during the dynamic calculation.
87	0	//
88	0	// The function is still fast on some old intel chips.
89	0	// See Bug 1256475.
90	0	bool use_deprecated = gfxPrefs::YCbCrAccurateConversion() \|\|
91	0	(supports_mmx() && supports_sse() && !supports_sse3() &&
92	0	yuv_color_space == YUVColorSpace::BT601);
93	0	// The deprecated function only support BT601.
94	0	// See Bug 1210357.
95	0	if (yuv_color_space != YUVColorSpace::BT601) {
96	0	use_deprecated = false;
97	0	}
98	0	if (use_deprecated) {
99	0	ConvertYCbCrToRGB32_deprecated(y_buf, u_buf, v_buf, rgb_buf,
100	0	pic_x, pic_y, pic_width, pic_height,
101	0	y_pitch, uv_pitch, rgb_pitch, yuv_type);
102	0	return;
103	0	}
104	0
105	0	if (yuv_type == YV24) {
106	0	const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
107	0	const uint8* src_u = u_buf + uv_pitch * pic_y + pic_x;
108	0	const uint8* src_v = v_buf + uv_pitch * pic_y + pic_x;
109	0	if (yuv_color_space == mozilla::YUVColorSpace::BT709) {
110	0	DebugOnly<int> err = libyuv::H444ToARGB(src_y, y_pitch,
111	0	src_u, uv_pitch,
112	0	src_v, uv_pitch,
113	0	rgb_buf, rgb_pitch,
114	0	pic_width, pic_height);
115	0	MOZ_ASSERT(!err);
116	0	} else {
117	0	DebugOnly<int> err = libyuv::I444ToARGB(src_y, y_pitch,
118	0	src_u, uv_pitch,
119	0	src_v, uv_pitch,
120	0	rgb_buf, rgb_pitch,
121	0	pic_width, pic_height);
122	0	MOZ_ASSERT(!err);
123	0	}
124	0	} else if (yuv_type == YV16) {
125	0	const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
126	0	const uint8* src_u = u_buf + uv_pitch * pic_y + pic_x / 2;
127	0	const uint8* src_v = v_buf + uv_pitch * pic_y + pic_x / 2;
128	0	if (yuv_color_space == mozilla::YUVColorSpace::BT709) {
129	0	DebugOnly<int> err = libyuv::H422ToARGB(src_y, y_pitch,
130	0	src_u, uv_pitch,
131	0	src_v, uv_pitch,
132	0	rgb_buf, rgb_pitch,
133	0	pic_width, pic_height);
134	0	MOZ_ASSERT(!err);
135	0	} else {
136	0	DebugOnly<int> err = libyuv::I422ToARGB(src_y, y_pitch,
137	0	src_u, uv_pitch,
138	0	src_v, uv_pitch,
139	0	rgb_buf, rgb_pitch,
140	0	pic_width, pic_height);
141	0	MOZ_ASSERT(!err);
142	0	}
143	0	} else {
144	0	MOZ_ASSERT(yuv_type == YV12);
145	0	const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
146	0	const uint8* src_u = u_buf + (uv_pitch * pic_y + pic_x) / 2;
147	0	const uint8* src_v = v_buf + (uv_pitch * pic_y + pic_x) / 2;
148	0	if (yuv_color_space == mozilla::YUVColorSpace::BT709) {
149	0	DebugOnly<int> err = libyuv::H420ToARGB(src_y, y_pitch,
150	0	src_u, uv_pitch,
151	0	src_v, uv_pitch,
152	0	rgb_buf, rgb_pitch,
153	0	pic_width, pic_height);
154	0	MOZ_ASSERT(!err);
155	0	} else {
156	0	DebugOnly<int> err = libyuv::I420ToARGB(src_y, y_pitch,
157	0	src_u, uv_pitch,
158	0	src_v, uv_pitch,
159	0	rgb_buf, rgb_pitch,
160	0	pic_width, pic_height);
161	0	MOZ_ASSERT(!err);
162	0	}
163	0	}
164	0	}
165
166		// Convert a frame of YUV to 32 bit ARGB.
167		void ConvertYCbCrToRGB32_deprecated(const uint8* y_buf,
168		const uint8* u_buf,
169		const uint8* v_buf,
170		uint8* rgb_buf,
171		int pic_x,
172		int pic_y,
173		int pic_width,
174		int pic_height,
175		int y_pitch,
176		int uv_pitch,
177		int rgb_pitch,
178	0	YUVType yuv_type) {
179	0	unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
180	0	unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
181	0	// Test for SSE because the optimized code uses movntq, which is not part of MMX.
182	0	bool has_sse = supports_mmx() && supports_sse();
183	0	// There is no optimized YV24 SSE routine so we check for this and
184	0	// fall back to the C code.
185	0	has_sse &= yuv_type != YV24;
186	0	bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
187	0	int x_width = odd_pic_x ? pic_width - 1 : pic_width;
188	0
189	0	for (int y = pic_y; y < pic_height + pic_y; ++y) {
190	0	uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
191	0	const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
192	0	const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
193	0	const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
194	0
195	0	if (odd_pic_x) {
196	0	// Handle the single odd pixel manually and use the
197	0	// fast routines for the remaining.
198	0	FastConvertYUVToRGB32Row_C(y_ptr++,
199	0	u_ptr++,
200	0	v_ptr++,
201	0	rgb_row,
202	0	1,
203	0	x_shift);
204	0	rgb_row += 4;
205	0	}
206	0
207	0	if (has_sse) {
208	0	FastConvertYUVToRGB32Row(y_ptr,
209	0	u_ptr,
210	0	v_ptr,
211	0	rgb_row,
212	0	x_width);
213	0	}
214	0	else {
215	0	FastConvertYUVToRGB32Row_C(y_ptr,
216	0	u_ptr,
217	0	v_ptr,
218	0	rgb_row,
219	0	x_width,
220	0	x_shift);
221	0	}
222	0	}
223	0
224	0	// MMX used for FastConvertYUVToRGB32Row requires emms instruction.
225	0	if (has_sse)
226	0	EMMS();
227	0	}
228
229		// C version does 8 at a time to mimic MMX code
230		static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
231	0	int source_width, int source_y_fraction) {
232	0	int y1_fraction = source_y_fraction;
233	0	int y0_fraction = 256 - y1_fraction;
234	0	uint8* end = ybuf + source_width;
235	0	do {
236	0	ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
237	0	ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
238	0	ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
239	0	ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
240	0	ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
241	0	ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
242	0	ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
243	0	ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
244	0	y0_ptr += 8;
245	0	y1_ptr += 8;
246	0	ybuf += 8;
247	0	} while (ybuf < end);
248	0	}
249
250		#ifdef MOZILLA_MAY_SUPPORT_MMX
251		void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
252		int source_width, int source_y_fraction);
253		#endif
254
255		#ifdef MOZILLA_MAY_SUPPORT_SSE2
256		void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
257		int source_width, int source_y_fraction);
258		#endif
259
260		static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr,
261		const uint8* y1_ptr, int source_width,
262	0	int source_y_fraction) {
263	0	#ifdef MOZILLA_MAY_SUPPORT_SSE2
264	0	if (mozilla::supports_sse2()) {
265	0	FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
266	0	return;
267	0	}
268	0	#endif
269	0
270	0	#ifdef MOZILLA_MAY_SUPPORT_MMX
271	0	if (mozilla::supports_mmx()) {
272	0	FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
273	0	return;
274	0	}
275	0	#endif
276	0
277	0	FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
278	0	}
279
280
281		// Scale a frame of YUV to 32 bit ARGB.
282		void ScaleYCbCrToRGB32(const uint8* y_buf,
283		const uint8* u_buf,
284		const uint8* v_buf,
285		uint8* rgb_buf,
286		int source_width,
287		int source_height,
288		int width,
289		int height,
290		int y_pitch,
291		int uv_pitch,
292		int rgb_pitch,
293		YUVType yuv_type,
294		YUVColorSpace yuv_color_space,
295	0	ScaleFilter filter) {
296	0
297	0	bool use_deprecated = gfxPrefs::YCbCrAccurateConversion() \|\|
298		#if defined(XP_WIN) && defined(_M_X64)
299		// libyuv does not support SIMD scaling on win 64bit. See Bug 1295927.
300		supports_sse3() \|\|
301		#endif
302	0	(supports_mmx() && supports_sse() && !supports_sse3());
303	0	// The deprecated function only support BT601.
304	0	// See Bug 1210357.
305	0	if (yuv_color_space != YUVColorSpace::BT601) {
306	0	use_deprecated = false;
307	0	}
308	0	if (use_deprecated) {
309	0	ScaleYCbCrToRGB32_deprecated(y_buf, u_buf, v_buf,
310	0	rgb_buf,
311	0	source_width, source_height,
312	0	width, height,
313	0	y_pitch, uv_pitch,
314	0	rgb_pitch,
315	0	yuv_type,
316	0	ROTATE_0,
317	0	filter);
318	0	return;
319	0	}
320	0
321	0	DebugOnly<int> err =
322	0	libyuv::YUVToARGBScale(y_buf, y_pitch,
323	0	u_buf, uv_pitch,
324	0	v_buf, uv_pitch,
325	0	FourCCFromYUVType(yuv_type),
326	0	yuv_color_space,
327	0	source_width, source_height,
328	0	rgb_buf, rgb_pitch,
329	0	width, height,
330	0	libyuv::kFilterBilinear);
331	0	MOZ_ASSERT(!err);
332	0	return;
333	0	}
334
335		// Scale a frame of YUV to 32 bit ARGB.
336		void ScaleYCbCrToRGB32_deprecated(const uint8* y_buf,
337		const uint8* u_buf,
338		const uint8* v_buf,
339		uint8* rgb_buf,
340		int source_width,
341		int source_height,
342		int width,
343		int height,
344		int y_pitch,
345		int uv_pitch,
346		int rgb_pitch,
347		YUVType yuv_type,
348		Rotate view_rotate,
349	0	ScaleFilter filter) {
350	0	bool has_mmx = supports_mmx();
351	0
352	0	// 4096 allows 3 buffers to fit in 12k.
353	0	// Helps performance on CPU with 16K L1 cache.
354	0	// Large enough for 3830x2160 and 30" displays which are 2560x1600.
355	0	const int kFilterBufferSize = 4096;
356	0	// Disable filtering if the screen is too big (to avoid buffer overflows).
357	0	// This should never happen to regular users: they don't have monitors
358	0	// wider than 4096 pixels.
359	0	// TODO(fbarchard): Allow rotated videos to filter.
360	0	if (source_width > kFilterBufferSize \|\| view_rotate)
361	0	filter = FILTER_NONE;
362	0
363	0	unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
364	0	// Diagram showing origin and direction of source sampling.
365	0	// ->0 4<-
366	0	// 7 3
367	0	//
368	0	// 6 5
369	0	// ->1 2<-
370	0	// Rotations that start at right side of image.
371	0	if ((view_rotate == ROTATE_180) \|\|
372	0	(view_rotate == ROTATE_270) \|\|
373	0	(view_rotate == MIRROR_ROTATE_0) \|\|
374	0	(view_rotate == MIRROR_ROTATE_90)) {
375	0	y_buf += source_width - 1;
376	0	u_buf += source_width / 2 - 1;
377	0	v_buf += source_width / 2 - 1;
378	0	source_width = -source_width;
379	0	}
380	0	// Rotations that start at bottom of image.
381	0	if ((view_rotate == ROTATE_90) \|\|
382	0	(view_rotate == ROTATE_180) \|\|
383	0	(view_rotate == MIRROR_ROTATE_90) \|\|
384	0	(view_rotate == MIRROR_ROTATE_180)) {
385	0	y_buf += (source_height - 1) * y_pitch;
386	0	u_buf += ((source_height >> y_shift) - 1) * uv_pitch;
387	0	v_buf += ((source_height >> y_shift) - 1) * uv_pitch;
388	0	source_height = -source_height;
389	0	}
390	0
391	0	// Handle zero sized destination.
392	0	if (width == 0 \|\| height == 0)
393	0	return;
394	0	int source_dx = source_width * kFractionMax / width;
395	0	int source_dy = source_height * kFractionMax / height;
396	0	int source_dx_uv = source_dx;
397	0
398	0	if ((view_rotate == ROTATE_90) \|\|
399	0	(view_rotate == ROTATE_270)) {
400	0	int tmp = height;
401	0	height = width;
402	0	width = tmp;
403	0	tmp = source_height;
404	0	source_height = source_width;
405	0	source_width = tmp;
406	0	int original_dx = source_dx;
407	0	int original_dy = source_dy;
408	0	source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;
409	0	source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;
410	0	source_dy = original_dx;
411	0	if (view_rotate == ROTATE_90) {
412	0	y_pitch = -1;
413	0	uv_pitch = -1;
414	0	source_height = -source_height;
415	0	} else {
416	0	y_pitch = 1;
417	0	uv_pitch = 1;
418	0	}
419	0	}
420	0
421	0	// Need padding because FilterRows() will write 1 to 16 extra pixels
422	0	// after the end for SSE2 version.
423	0	uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
424	0	uint8* ybuf =
425	0	reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15);
426	0	uint8* ubuf = ybuf + kFilterBufferSize;
427	0	uint8* vbuf = ubuf + kFilterBufferSize;
428	0	// TODO(fbarchard): Fixed point math is off by 1 on negatives.
429	0	int yscale_fixed = (source_height << kFractionBits) / height;
430	0
431	0	// TODO(fbarchard): Split this into separate function for better efficiency.
432	0	for (int y = 0; y < height; ++y) {
433	0	uint8* dest_pixel = rgb_buf + y * rgb_pitch;
434	0	int source_y_subpixel = (y * yscale_fixed);
435	0	if (yscale_fixed >= (kFractionMax * 2)) {
436	0	source_y_subpixel += kFractionMax / 2; // For 1/2 or less, center filter.
437	0	}
438	0	int source_y = source_y_subpixel >> kFractionBits;
439	0
440	0	const uint8* y0_ptr = y_buf + source_y * y_pitch;
441	0	const uint8* y1_ptr = y0_ptr + y_pitch;
442	0
443	0	const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
444	0	const uint8* u1_ptr = u0_ptr + uv_pitch;
445	0	const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
446	0	const uint8* v1_ptr = v0_ptr + uv_pitch;
447	0
448	0	// vertical scaler uses 16.8 fixed point
449	0	int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8;
450	0	int source_uv_fraction =
451	0	((source_y_subpixel >> y_shift) & kFractionMask) >> 8;
452	0
453	0	const uint8* y_ptr = y0_ptr;
454	0	const uint8* u_ptr = u0_ptr;
455	0	const uint8* v_ptr = v0_ptr;
456	0	// Apply vertical filtering if necessary.
457	0	// TODO(fbarchard): Remove memcpy when not necessary.
458	0	if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
459	0	if (yscale_fixed != kFractionMax &&
460	0	source_y_fraction && ((source_y + 1) < source_height)) {
461	0	FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
462	0	} else {
463	0	memcpy(ybuf, y0_ptr, source_width);
464	0	}
465	0	y_ptr = ybuf;
466	0	ybuf[source_width] = ybuf[source_width-1];
467	0	int uv_source_width = (source_width + 1) / 2;
468	0	if (yscale_fixed != kFractionMax &&
469	0	source_uv_fraction &&
470	0	(((source_y >> y_shift) + 1) < (source_height >> y_shift))) {
471	0	FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
472	0	FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
473	0	} else {
474	0	memcpy(ubuf, u0_ptr, uv_source_width);
475	0	memcpy(vbuf, v0_ptr, uv_source_width);
476	0	}
477	0	u_ptr = ubuf;
478	0	v_ptr = vbuf;
479	0	ubuf[uv_source_width] = ubuf[uv_source_width - 1];
480	0	vbuf[uv_source_width] = vbuf[uv_source_width - 1];
481	0	}
482	0	if (source_dx == kFractionMax) { // Not scaled
483	0	FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
484	0	dest_pixel, width);
485	0	} else if (filter & FILTER_BILINEAR_H) {
486	0	LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
487	0	dest_pixel, width, source_dx);
488	0	} else {
489	0	// Specialized scalers and rotation.
490		#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86) && !defined(__clang__)
491		if(mozilla::supports_sse()) {
492		if (width == (source_width * 2)) {
493		DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
494		dest_pixel, width);
495		} else if ((source_dx & kFractionMask) == 0) {
496		// Scaling by integer scale factor. ie half.
497		ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
498		dest_pixel, width,
499		source_dx >> kFractionBits);
500		} else if (source_dx_uv == source_dx) { // Not rotated.
501		ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
502		dest_pixel, width, source_dx);
503		} else {
504		RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
505		dest_pixel, width,
506		source_dx >> kFractionBits,
507		source_dx_uv >> kFractionBits);
508		}
509		}
510		else {
511		ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
512		dest_pixel, width, source_dx);
513		}
514		#else
515		(void)source_dx_uv;
516	0	ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
517	0	dest_pixel, width, source_dx);
518	0	#endif
519	0	}
520	0	}
521	0	// MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
522	0	if (has_mmx)
523	0	EMMS();
524	0	}
525		void ConvertYCbCrAToARGB32(const uint8* y_buf,
526		const uint8* u_buf,
527		const uint8* v_buf,
528		const uint8* a_buf,
529		uint8* argb_buf,
530		int pic_width,
531		int pic_height,
532		int ya_pitch,
533		int uv_pitch,
534	0	int argb_pitch) {
535	0
536	0	// The downstream graphics stack expects an attenuated input, hence why the
537	0	// attenuation parameter is set.
538	0	DebugOnly<int> err = libyuv::I420AlphaToARGB(y_buf, ya_pitch,
539	0	u_buf, uv_pitch,
540	0	v_buf, uv_pitch,
541	0	a_buf, ya_pitch,
542	0	argb_buf, argb_pitch,
543	0	pic_width, pic_height, 1);
544	0	MOZ_ASSERT(!err);
545	0	}
546
547		} // namespace gfx
548		} // namespace mozilla