/src/libde265/libde265/acceleration.h

Source (jump to first uncovered line)
/*
 * H.265 video codec.
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
 *
 * This file is part of libde265.
 *
 * libde265 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * libde265 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef DE265_ACCELERATION_H
#define DE265_ACCELERATION_H

#include <stddef.h>
#include <stdint.h>
#include <assert.h>


struct acceleration_functions
{
  void (*put_weighted_pred_avg_8)(uint8_t *_dst, ptrdiff_t dststride,
                                  const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                  int width, int height);

  void (*put_unweighted_pred_8)(uint8_t *_dst, ptrdiff_t dststride,
                                const int16_t *src, ptrdiff_t srcstride,
                                int width, int height);

  void (*put_weighted_pred_8)(uint8_t *_dst, ptrdiff_t dststride,
                              const int16_t *src, ptrdiff_t srcstride,
                              int width, int height,
                              int w,int o,int log2WD);
  void (*put_weighted_bipred_8)(uint8_t *_dst, ptrdiff_t dststride,
                                const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                int width, int height,
                                int w1,int o1, int w2,int o2, int log2WD);


  void (*put_weighted_pred_avg_16)(uint16_t *_dst, ptrdiff_t dststride,
                                  const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                   int width, int height, int bit_depth);

  void (*put_unweighted_pred_16)(uint16_t *_dst, ptrdiff_t dststride,
                                const int16_t *src, ptrdiff_t srcstride,
                                int width, int height, int bit_depth);

  void (*put_weighted_pred_16)(uint16_t *_dst, ptrdiff_t dststride,
                              const int16_t *src, ptrdiff_t srcstride,
                              int width, int height,
                              int w,int o,int log2WD, int bit_depth);
  void (*put_weighted_bipred_16)(uint16_t *_dst, ptrdiff_t dststride,
                                const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                int width, int height,
                                int w1,int o1, int w2,int o2, int log2WD, int bit_depth);


  void put_weighted_pred_avg(void *_dst, ptrdiff_t dststride,
                             const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                             int width, int height, int bit_depth) const;

  void put_unweighted_pred(void *_dst, ptrdiff_t dststride,
                           const int16_t *src, ptrdiff_t srcstride,
                           int width, int height, int bit_depth) const;

  void put_weighted_pred(void *_dst, ptrdiff_t dststride,
                         const int16_t *src, ptrdiff_t srcstride,
                         int width, int height,
                         int w,int o,int log2WD, int bit_depth) const;
  void put_weighted_bipred(void *_dst, ptrdiff_t dststride,
                           const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                           int width, int height,
                           int w1,int o1, int w2,int o2, int log2WD, int bit_depth) const;




  void (*put_hevc_epel_8)(int16_t *dst, ptrdiff_t dststride,
                          const uint8_t *src, ptrdiff_t srcstride, int width, int height,
                          int mx, int my, int16_t* mcbuffer);
  void (*put_hevc_epel_h_8)(int16_t *dst, ptrdiff_t dststride,
                            const uint8_t *src, ptrdiff_t srcstride, int width, int height,
                            int mx, int my, int16_t* mcbuffer, int bit_depth);
  void (*put_hevc_epel_v_8)(int16_t *dst, ptrdiff_t dststride,
                            const uint8_t *src, ptrdiff_t srcstride, int width, int height,
                            int mx, int my, int16_t* mcbuffer, int bit_depth);
  void (*put_hevc_epel_hv_8)(int16_t *dst, ptrdiff_t dststride,
                             const uint8_t *src, ptrdiff_t srcstride, int width, int height,
                             int mx, int my, int16_t* mcbuffer, int bit_depth);

  void (*put_hevc_qpel_8[4][4])(int16_t *dst, ptrdiff_t dststride,
                                const uint8_t *src, ptrdiff_t srcstride, int width, int height,
                                int16_t* mcbuffer);


  void (*put_hevc_epel_16)(int16_t *dst, ptrdiff_t dststride,
                           const uint16_t *src, ptrdiff_t srcstride, int width, int height,
                           int mx, int my, int16_t* mcbuffer, int bit_depth);
  void (*put_hevc_epel_h_16)(int16_t *dst, ptrdiff_t dststride,
                             const uint16_t *src, ptrdiff_t srcstride, int width, int height,
                            int mx, int my, int16_t* mcbuffer, int bit_depth);
  void (*put_hevc_epel_v_16)(int16_t *dst, ptrdiff_t dststride,
                             const uint16_t *src, ptrdiff_t srcstride, int width, int height,
                             int mx, int my, int16_t* mcbuffer, int bit_depth);
  void (*put_hevc_epel_hv_16)(int16_t *dst, ptrdiff_t dststride,
                              const uint16_t *src, ptrdiff_t srcstride, int width, int height,
                              int mx, int my, int16_t* mcbuffer, int bit_depth);

  void (*put_hevc_qpel_16[4][4])(int16_t *dst, ptrdiff_t dststride,
                                 const uint16_t *src, ptrdiff_t srcstride, int width, int height,
                                 int16_t* mcbuffer, int bit_depth);


  void put_hevc_epel(int16_t *dst, ptrdiff_t dststride,
                     const void *src, ptrdiff_t srcstride, int width, int height,
                     int mx, int my, int16_t* mcbuffer, int bit_depth) const;
  void put_hevc_epel_h(int16_t *dst, ptrdiff_t dststride,
                       const void *src, ptrdiff_t srcstride, int width, int height,
                       int mx, int my, int16_t* mcbuffer, int bit_depth) const;
  void put_hevc_epel_v(int16_t *dst, ptrdiff_t dststride,
                       const void *src, ptrdiff_t srcstride, int width, int height,
                       int mx, int my, int16_t* mcbuffer, int bit_depth) const;
  void put_hevc_epel_hv(int16_t *dst, ptrdiff_t dststride,
                        const void *src, ptrdiff_t srcstride, int width, int height,
                        int mx, int my, int16_t* mcbuffer, int bit_depth) const;

  void put_hevc_qpel(int16_t *dst, ptrdiff_t dststride,
                     const void *src, ptrdiff_t srcstride, int width, int height,
                     int16_t* mcbuffer, int dX,int dY, int bit_depth) const;


  // --- inverse transforms ---

  void (*transform_bypass)(int32_t *residual, const int16_t *coeffs, int nT);
  void (*transform_bypass_rdpcm_v)(int32_t *r, const int16_t *coeffs, int nT);
  void (*transform_bypass_rdpcm_h)(int32_t *r, const int16_t *coeffs, int nT);

  // 8 bit

  void (*transform_skip_8)(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t _stride); // no transform
  void (*transform_skip_rdpcm_v_8)(uint8_t *_dst, const int16_t *coeffs, int nT, ptrdiff_t _stride);
  void (*transform_skip_rdpcm_h_8)(uint8_t *_dst, const int16_t *coeffs, int nT, ptrdiff_t _stride);
  void (*transform_4x4_dst_add_8)(uint8_t *dst, const int16_t *coeffs, ptrdiff_t stride); // iDST
  void (*transform_add_8[4])(uint8_t *dst, const int16_t *coeffs, ptrdiff_t stride); // iDCT

  // 9-16 bit

  void (*transform_skip_16)(uint16_t *_dst, const int16_t *coeffs, ptrdiff_t _stride, int bit_depth); // no transform
  void (*transform_4x4_dst_add_16)(uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth); // iDST
  void (*transform_add_16[4])(uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth); // iDCT


  void (*rotate_coefficients)(int16_t *coeff, int nT);

  void (*transform_idst_4x4)(int32_t *dst, const int16_t *coeffs, int bdShift, int max_coeff_bits);
  void (*transform_idct_4x4)(int32_t *dst, const int16_t *coeffs, int bdShift, int max_coeff_bits);
  void (*transform_idct_8x8)(int32_t *dst, const int16_t *coeffs, int bdShift, int max_coeff_bits);
  void (*transform_idct_16x16)(int32_t *dst,const int16_t *coeffs,int bdShift, int max_coeff_bits);
  void (*transform_idct_32x32)(int32_t *dst,const int16_t *coeffs,int bdShift, int max_coeff_bits);
  void (*add_residual_8)(uint8_t *dst, ptrdiff_t stride, const int32_t* r, int nT, int bit_depth);
  void (*add_residual_16)(uint16_t *dst,ptrdiff_t stride,const int32_t* r, int nT, int bit_depth);

  template <class pixel_t>
  void add_residual(pixel_t *dst, ptrdiff_t stride, const int32_t* r, int nT, int bit_depth) const;

  void (*rdpcm_v)(int32_t* residual, const int16_t* coeffs, int nT,int tsShift,int bdShift);
  void (*rdpcm_h)(int32_t* residual, const int16_t* coeffs, int nT,int tsShift,int bdShift);

  void (*transform_skip_residual)(int32_t *residual, const int16_t *coeffs, int nT,
                                  int tsShift,int bdShift);


  template <class pixel_t> void transform_skip(pixel_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const;
  template <class pixel_t> void transform_skip_rdpcm_v(pixel_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const;
  template <class pixel_t> void transform_skip_rdpcm_h(pixel_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const;
  template <class pixel_t> void transform_4x4_dst_add(pixel_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const;
  template <class pixel_t> void transform_add(int sizeIdx, pixel_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const;



  // --- forward transforms ---

  void (*fwd_transform_4x4_dst_8)(int16_t *coeffs, const int16_t* src, ptrdiff_t stride); // fDST

  // indexed with (log2TbSize-2)
  void (*fwd_transform_8[4])     (int16_t *coeffs, const int16_t *src, ptrdiff_t stride); // fDCT


  // forward Hadamard transform (without scaling factor)
  // (4x4,8x8,16x16,32x32) indexed with (log2TbSize-2)
  void (*hadamard_transform_8[4])     (int16_t *coeffs, const int16_t *src, ptrdiff_t stride);
};


/*
template <> inline void acceleration_functions::put_weighted_pred_avg<uint8_t>(uint8_t *_dst, ptrdiff_t dststride,
                                                                               const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                                                               int width, int height, int bit_depth) { put_weighted_pred_avg_8(_dst,dststride,src1,src2,srcstride,width,height); }
template <> inline void acceleration_functions::put_weighted_pred_avg<uint16_t>(uint16_t *_dst, ptrdiff_t dststride,
                                                                                const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                                                                int width, int height, int bit_depth) { put_weighted_pred_avg_16(_dst,dststride,src1,src2,
                                                                                                                                                 srcstride,width,height,bit_depth); }

template <> inline void acceleration_functions::put_unweighted_pred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride,
                                                                             const int16_t *src, ptrdiff_t srcstride,
                                                                             int width, int height, int bit_depth) { put_unweighted_pred_8(_dst,dststride,src,srcstride,width,height); }
template <> inline void acceleration_functions::put_unweighted_pred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride,
                                                                              const int16_t *src, ptrdiff_t srcstride,
                                                                              int width, int height, int bit_depth) { put_unweighted_pred_16(_dst,dststride,src,srcstride,width,height,bit_depth); }

template <> inline void acceleration_functions::put_weighted_pred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride,
                                                                           const int16_t *src, ptrdiff_t srcstride,
                                                                           int width, int height,
                                                                           int w,int o,int log2WD, int bit_depth) { put_weighted_pred_8(_dst,dststride,src,srcstride,width,height,w,o,log2WD); }
template <> inline void acceleration_functions::put_weighted_pred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride,
                                                                            const int16_t *src, ptrdiff_t srcstride,
                                                                            int width, int height,
                                                                            int w,int o,int log2WD, int bit_depth) { put_weighted_pred_16(_dst,dststride,src,srcstride,width,height,w,o,log2WD,bit_depth); }

template <> inline void acceleration_functions::put_weighted_bipred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride,
                                                                             const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                                                             int width, int height,
                                                                             int w1,int o1, int w2,int o2, int log2WD, int bit_depth) { put_weighted_bipred_8(_dst,dststride,src1,src2,srcstride,
                                                                                                                                                              width,height,
                                                                                                                                                              w1,o1,w2,o2,log2WD); }
template <> inline void acceleration_functions::put_weighted_bipred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride,
                                                                              const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                                                              int width, int height,
                                                                              int w1,int o1, int w2,int o2, int log2WD, int bit_depth) { put_weighted_bipred_16(_dst,dststride,src1,src2,srcstride,
                                                                                                                                                                width,height,
                                                                                                                                                                w1,o1,w2,o2,log2WD,bit_depth); }
*/


inline void acceleration_functions::put_weighted_pred_avg(void* _dst, ptrdiff_t dststride,
                                                          const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                                          int width, int height, int bit_depth) const
{
  if (bit_depth <= 8)
    put_weighted_pred_avg_8((uint8_t*)_dst,dststride,src1,src2,srcstride,width,height);
  else
    put_weighted_pred_avg_16((uint16_t*)_dst,dststride,src1,src2,srcstride,width,height,bit_depth);
}


inline void acceleration_functions::put_unweighted_pred(void* _dst, ptrdiff_t dststride,
                                                        const int16_t *src, ptrdiff_t srcstride,
                                                        int width, int height, int bit_depth) const
{
  if (bit_depth <= 8)
    put_unweighted_pred_8((uint8_t*)_dst,dststride,src,srcstride,width,height);
  else
    put_unweighted_pred_16((uint16_t*)_dst,dststride,src,srcstride,width,height,bit_depth);
}


inline void acceleration_functions::put_weighted_pred(void* _dst, ptrdiff_t dststride,
                                                      const int16_t *src, ptrdiff_t srcstride,
                                                      int width, int height,
                                                      int w,int o,int log2WD, int bit_depth) const
{
  if (bit_depth <= 8)
    put_weighted_pred_8((uint8_t*)_dst,dststride,src,srcstride,width,height,w,o,log2WD);
  else
    put_weighted_pred_16((uint16_t*)_dst,dststride,src,srcstride,width,height,w,o,log2WD,bit_depth);
}


inline void acceleration_functions::put_weighted_bipred(void* _dst, ptrdiff_t dststride,
                                                        const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
                                                        int width, int height,
                                                        int w1,int o1, int w2,int o2, int log2WD, int bit_depth) const
{
  if (bit_depth <= 8)
    put_weighted_bipred_8((uint8_t*)_dst,dststride,src1,src2,srcstride, width,height, w1,o1,w2,o2,log2WD);
  else
    put_weighted_bipred_16((uint16_t*)_dst,dststride,src1,src2,srcstride, width,height, w1,o1,w2,o2,log2WD,bit_depth);
}



inline void acceleration_functions::put_hevc_epel(int16_t *dst, ptrdiff_t dststride,
                                                  const void *src, ptrdiff_t srcstride, int width, int height,
                                                  int mx, int my, int16_t* mcbuffer, int bit_depth) const
{
  if (bit_depth <= 8)
    put_hevc_epel_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer);
  else
    put_hevc_epel_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth);
}

inline void acceleration_functions::put_hevc_epel_h(int16_t *dst, ptrdiff_t dststride,
                                                    const void *src, ptrdiff_t srcstride, int width, int height,
                                                    int mx, int my, int16_t* mcbuffer, int bit_depth) const
{
  if (bit_depth <= 8)
    put_hevc_epel_h_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth);
  else
    put_hevc_epel_h_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth);
}

inline void acceleration_functions::put_hevc_epel_v(int16_t *dst, ptrdiff_t dststride,
                                                    const void *src, ptrdiff_t srcstride, int width, int height,
                                                    int mx, int my, int16_t* mcbuffer, int bit_depth) const
{
  if (bit_depth <= 8)
    put_hevc_epel_v_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth);
  else
    put_hevc_epel_v_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth);
}

inline void acceleration_functions::put_hevc_epel_hv(int16_t *dst, ptrdiff_t dststride,
                                                     const void *src, ptrdiff_t srcstride, int width, int height,
                                                     int mx, int my, int16_t* mcbuffer, int bit_depth) const
{
  if (bit_depth <= 8)
    put_hevc_epel_hv_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth);
  else
    put_hevc_epel_hv_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth);
}

inline void acceleration_functions::put_hevc_qpel(int16_t *dst, ptrdiff_t dststride,
                                                  const void *src, ptrdiff_t srcstride, int width, int height,
                                                  int16_t* mcbuffer, int dX,int dY, int bit_depth) const
{
  if (bit_depth <= 8)
    put_hevc_qpel_8[dX][dY](dst,dststride,(const uint8_t*)src,srcstride,width,height,mcbuffer);
  else
    put_hevc_qpel_16[dX][dY](dst,dststride,(const uint16_t*)src,srcstride,width,height,mcbuffer, bit_depth);
}

template <> inline void acceleration_functions::transform_skip<uint8_t>(uint8_t *dst, const int16_t *coeffs,ptrdiff_t stride, int bit_depth) const { transform_skip_8(dst,coeffs,stride); }
template <> inline void acceleration_functions::transform_skip<uint16_t>(uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const { transform_skip_16(dst,coeffs,stride, bit_depth); }

template <> inline void acceleration_functions::transform_skip_rdpcm_v<uint8_t>(uint8_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(bit_depth==8); transform_skip_rdpcm_v_8(dst,coeffs,nT,stride); }
template <> inline void acceleration_functions::transform_skip_rdpcm_h<uint8_t>(uint8_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(bit_depth==8); transform_skip_rdpcm_h_8(dst,coeffs,nT,stride); }
template <> inline void acceleration_functions::transform_skip_rdpcm_v<uint16_t>(uint16_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(false); /*transform_skip_rdpcm_v_8(dst,coeffs,nT,stride);*/ }
template <> inline void acceleration_functions::transform_skip_rdpcm_h<uint16_t>(uint16_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(false); /*transform_skip_rdpcm_h_8(dst,coeffs,nT,stride);*/ }


template <> inline void acceleration_functions::transform_4x4_dst_add<uint8_t>(uint8_t *dst, const int16_t *coeffs, ptrdiff_t stride,int bit_depth) const { transform_4x4_dst_add_8(dst,coeffs,stride); }
template <> inline void acceleration_functions::transform_4x4_dst_add<uint16_t>(uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride,int bit_depth) const { transform_4x4_dst_add_16(dst,coeffs,stride,bit_depth); }

template <> inline void acceleration_functions::transform_add<uint8_t>(int sizeIdx, uint8_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const { transform_add_8[sizeIdx](dst,coeffs,stride); }
template <> inline void acceleration_functions::transform_add<uint16_t>(int sizeIdx, uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const { transform_add_16[sizeIdx](dst,coeffs,stride,bit_depth); }

template <> inline void acceleration_functions::add_residual(uint8_t *dst,  ptrdiff_t stride, const int32_t* r, int nT, int bit_depth) const { add_residual_8(dst,stride,r,nT,bit_depth); }
template <> inline void acceleration_functions::add_residual(uint16_t *dst, ptrdiff_t stride, const int32_t* r, int nT, int bit_depth) const { add_residual_16(dst,stride,r,nT,bit_depth); }

#endif

Coverage Report

Created: 2022-08-24 06:11

Line	Count	Source (jump to first uncovered line)
1		/*
2		* H.265 video codec.
3		* Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4		*
5		* This file is part of libde265.
6		*
7		* libde265 is free software: you can redistribute it and/or modify
8		* it under the terms of the GNU Lesser General Public License as
9		* published by the Free Software Foundation, either version 3 of
10		* the License, or (at your option) any later version.
11		*
12		* libde265 is distributed in the hope that it will be useful,
13		* but WITHOUT ANY WARRANTY; without even the implied warranty of
14		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		* GNU Lesser General Public License for more details.
16		*
17		* You should have received a copy of the GNU Lesser General Public License
18		* along with libde265. If not, see <http://www.gnu.org/licenses/>.
19		*/
20
21		#ifndef DE265_ACCELERATION_H
22		#define DE265_ACCELERATION_H
23
24		#include <stddef.h>
25		#include <stdint.h>
26		#include <assert.h>
27
28
29		struct acceleration_functions
30		{
31		void (put_weighted_pred_avg_8)(uint8_t _dst, ptrdiff_t dststride,
32		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
33		int width, int height);
34
35		void (put_unweighted_pred_8)(uint8_t _dst, ptrdiff_t dststride,
36		const int16_t *src, ptrdiff_t srcstride,
37		int width, int height);
38
39		void (put_weighted_pred_8)(uint8_t _dst, ptrdiff_t dststride,
40		const int16_t *src, ptrdiff_t srcstride,
41		int width, int height,
42		int w,int o,int log2WD);
43		void (put_weighted_bipred_8)(uint8_t _dst, ptrdiff_t dststride,
44		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
45		int width, int height,
46		int w1,int o1, int w2,int o2, int log2WD);
47
48
49		void (put_weighted_pred_avg_16)(uint16_t _dst, ptrdiff_t dststride,
50		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
51		int width, int height, int bit_depth);
52
53		void (put_unweighted_pred_16)(uint16_t _dst, ptrdiff_t dststride,
54		const int16_t *src, ptrdiff_t srcstride,
55		int width, int height, int bit_depth);
56
57		void (put_weighted_pred_16)(uint16_t _dst, ptrdiff_t dststride,
58		const int16_t *src, ptrdiff_t srcstride,
59		int width, int height,
60		int w,int o,int log2WD, int bit_depth);
61		void (put_weighted_bipred_16)(uint16_t _dst, ptrdiff_t dststride,
62		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
63		int width, int height,
64		int w1,int o1, int w2,int o2, int log2WD, int bit_depth);
65
66
67		void put_weighted_pred_avg(void *_dst, ptrdiff_t dststride,
68		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
69		int width, int height, int bit_depth) const;
70
71		void put_unweighted_pred(void *_dst, ptrdiff_t dststride,
72		const int16_t *src, ptrdiff_t srcstride,
73		int width, int height, int bit_depth) const;
74
75		void put_weighted_pred(void *_dst, ptrdiff_t dststride,
76		const int16_t *src, ptrdiff_t srcstride,
77		int width, int height,
78		int w,int o,int log2WD, int bit_depth) const;
79		void put_weighted_bipred(void *_dst, ptrdiff_t dststride,
80		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
81		int width, int height,
82		int w1,int o1, int w2,int o2, int log2WD, int bit_depth) const;
83
84
85
86
87		void (put_hevc_epel_8)(int16_t dst, ptrdiff_t dststride,
88		const uint8_t *src, ptrdiff_t srcstride, int width, int height,
89		int mx, int my, int16_t* mcbuffer);
90		void (put_hevc_epel_h_8)(int16_t dst, ptrdiff_t dststride,
91		const uint8_t *src, ptrdiff_t srcstride, int width, int height,
92		int mx, int my, int16_t* mcbuffer, int bit_depth);
93		void (put_hevc_epel_v_8)(int16_t dst, ptrdiff_t dststride,
94		const uint8_t *src, ptrdiff_t srcstride, int width, int height,
95		int mx, int my, int16_t* mcbuffer, int bit_depth);
96		void (put_hevc_epel_hv_8)(int16_t dst, ptrdiff_t dststride,
97		const uint8_t *src, ptrdiff_t srcstride, int width, int height,
98		int mx, int my, int16_t* mcbuffer, int bit_depth);
99
100		void (put_hevc_qpel_8[4][4])(int16_t dst, ptrdiff_t dststride,
101		const uint8_t *src, ptrdiff_t srcstride, int width, int height,
102		int16_t* mcbuffer);
103
104
105		void (put_hevc_epel_16)(int16_t dst, ptrdiff_t dststride,
106		const uint16_t *src, ptrdiff_t srcstride, int width, int height,
107		int mx, int my, int16_t* mcbuffer, int bit_depth);
108		void (put_hevc_epel_h_16)(int16_t dst, ptrdiff_t dststride,
109		const uint16_t *src, ptrdiff_t srcstride, int width, int height,
110		int mx, int my, int16_t* mcbuffer, int bit_depth);
111		void (put_hevc_epel_v_16)(int16_t dst, ptrdiff_t dststride,
112		const uint16_t *src, ptrdiff_t srcstride, int width, int height,
113		int mx, int my, int16_t* mcbuffer, int bit_depth);
114		void (put_hevc_epel_hv_16)(int16_t dst, ptrdiff_t dststride,
115		const uint16_t *src, ptrdiff_t srcstride, int width, int height,
116		int mx, int my, int16_t* mcbuffer, int bit_depth);
117
118		void (put_hevc_qpel_16[4][4])(int16_t dst, ptrdiff_t dststride,
119		const uint16_t *src, ptrdiff_t srcstride, int width, int height,
120		int16_t* mcbuffer, int bit_depth);
121
122
123		void put_hevc_epel(int16_t *dst, ptrdiff_t dststride,
124		const void *src, ptrdiff_t srcstride, int width, int height,
125		int mx, int my, int16_t* mcbuffer, int bit_depth) const;
126		void put_hevc_epel_h(int16_t *dst, ptrdiff_t dststride,
127		const void *src, ptrdiff_t srcstride, int width, int height,
128		int mx, int my, int16_t* mcbuffer, int bit_depth) const;
129		void put_hevc_epel_v(int16_t *dst, ptrdiff_t dststride,
130		const void *src, ptrdiff_t srcstride, int width, int height,
131		int mx, int my, int16_t* mcbuffer, int bit_depth) const;
132		void put_hevc_epel_hv(int16_t *dst, ptrdiff_t dststride,
133		const void *src, ptrdiff_t srcstride, int width, int height,
134		int mx, int my, int16_t* mcbuffer, int bit_depth) const;
135
136		void put_hevc_qpel(int16_t *dst, ptrdiff_t dststride,
137		const void *src, ptrdiff_t srcstride, int width, int height,
138		int16_t* mcbuffer, int dX,int dY, int bit_depth) const;
139
140
141		// --- inverse transforms ---
142
143		void (transform_bypass)(int32_t residual, const int16_t *coeffs, int nT);
144		void (transform_bypass_rdpcm_v)(int32_t r, const int16_t *coeffs, int nT);
145		void (transform_bypass_rdpcm_h)(int32_t r, const int16_t *coeffs, int nT);
146
147		// 8 bit
148
149		void (transform_skip_8)(uint8_t _dst, const int16_t *coeffs, ptrdiff_t _stride); // no transform
150		void (transform_skip_rdpcm_v_8)(uint8_t _dst, const int16_t *coeffs, int nT, ptrdiff_t _stride);
151		void (transform_skip_rdpcm_h_8)(uint8_t _dst, const int16_t *coeffs, int nT, ptrdiff_t _stride);
152		void (transform_4x4_dst_add_8)(uint8_t dst, const int16_t *coeffs, ptrdiff_t stride); // iDST
153		void (transform_add_8[4])(uint8_t dst, const int16_t *coeffs, ptrdiff_t stride); // iDCT
154
155		// 9-16 bit
156
157		void (transform_skip_16)(uint16_t _dst, const int16_t *coeffs, ptrdiff_t _stride, int bit_depth); // no transform
158		void (transform_4x4_dst_add_16)(uint16_t dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth); // iDST
159		void (transform_add_16[4])(uint16_t dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth); // iDCT
160
161
162		void (rotate_coefficients)(int16_t coeff, int nT);
163
164		void (transform_idst_4x4)(int32_t dst, const int16_t *coeffs, int bdShift, int max_coeff_bits);
165		void (transform_idct_4x4)(int32_t dst, const int16_t *coeffs, int bdShift, int max_coeff_bits);
166		void (transform_idct_8x8)(int32_t dst, const int16_t *coeffs, int bdShift, int max_coeff_bits);
167		void (transform_idct_16x16)(int32_t dst,const int16_t *coeffs,int bdShift, int max_coeff_bits);
168		void (transform_idct_32x32)(int32_t dst,const int16_t *coeffs,int bdShift, int max_coeff_bits);
169		void (add_residual_8)(uint8_t dst, ptrdiff_t stride, const int32_t* r, int nT, int bit_depth);
170		void (add_residual_16)(uint16_t dst,ptrdiff_t stride,const int32_t* r, int nT, int bit_depth);
171
172		template <class pixel_t>
173		void add_residual(pixel_t dst, ptrdiff_t stride, const int32_t r, int nT, int bit_depth) const;
174
175		void (rdpcm_v)(int32_t residual, const int16_t* coeffs, int nT,int tsShift,int bdShift);
176		void (rdpcm_h)(int32_t residual, const int16_t* coeffs, int nT,int tsShift,int bdShift);
177
178		void (transform_skip_residual)(int32_t residual, const int16_t *coeffs, int nT,
179		int tsShift,int bdShift);
180
181
182		template <class pixel_t> void transform_skip(pixel_t dst, const int16_t coeffs, ptrdiff_t stride, int bit_depth) const;
183		template <class pixel_t> void transform_skip_rdpcm_v(pixel_t dst, const int16_t coeffs, int nT, ptrdiff_t stride, int bit_depth) const;
184		template <class pixel_t> void transform_skip_rdpcm_h(pixel_t dst, const int16_t coeffs, int nT, ptrdiff_t stride, int bit_depth) const;
185		template <class pixel_t> void transform_4x4_dst_add(pixel_t dst, const int16_t coeffs, ptrdiff_t stride, int bit_depth) const;
186		template <class pixel_t> void transform_add(int sizeIdx, pixel_t dst, const int16_t coeffs, ptrdiff_t stride, int bit_depth) const;
187
188
189
190		// --- forward transforms ---
191
192		void (fwd_transform_4x4_dst_8)(int16_t coeffs, const int16_t* src, ptrdiff_t stride); // fDST
193
194		// indexed with (log2TbSize-2)
195		void (fwd_transform_8[4]) (int16_t coeffs, const int16_t *src, ptrdiff_t stride); // fDCT
196
197
198		// forward Hadamard transform (without scaling factor)
199		// (4x4,8x8,16x16,32x32) indexed with (log2TbSize-2)
200		void (hadamard_transform_8[4]) (int16_t coeffs, const int16_t *src, ptrdiff_t stride);
201		};
202
203
204		/*
205		template <> inline void acceleration_functions::put_weighted_pred_avg<uint8_t>(uint8_t *_dst, ptrdiff_t dststride,
206		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
207		int width, int height, int bit_depth) { put_weighted_pred_avg_8(_dst,dststride,src1,src2,srcstride,width,height); }
208		template <> inline void acceleration_functions::put_weighted_pred_avg<uint16_t>(uint16_t *_dst, ptrdiff_t dststride,
209		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
210		int width, int height, int bit_depth) { put_weighted_pred_avg_16(_dst,dststride,src1,src2,
211		srcstride,width,height,bit_depth); }
212
213		template <> inline void acceleration_functions::put_unweighted_pred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride,
214		const int16_t *src, ptrdiff_t srcstride,
215		int width, int height, int bit_depth) { put_unweighted_pred_8(_dst,dststride,src,srcstride,width,height); }
216		template <> inline void acceleration_functions::put_unweighted_pred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride,
217		const int16_t *src, ptrdiff_t srcstride,
218		int width, int height, int bit_depth) { put_unweighted_pred_16(_dst,dststride,src,srcstride,width,height,bit_depth); }
219
220		template <> inline void acceleration_functions::put_weighted_pred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride,
221		const int16_t *src, ptrdiff_t srcstride,
222		int width, int height,
223		int w,int o,int log2WD, int bit_depth) { put_weighted_pred_8(_dst,dststride,src,srcstride,width,height,w,o,log2WD); }
224		template <> inline void acceleration_functions::put_weighted_pred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride,
225		const int16_t *src, ptrdiff_t srcstride,
226		int width, int height,
227		int w,int o,int log2WD, int bit_depth) { put_weighted_pred_16(_dst,dststride,src,srcstride,width,height,w,o,log2WD,bit_depth); }
228
229		template <> inline void acceleration_functions::put_weighted_bipred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride,
230		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
231		int width, int height,
232		int w1,int o1, int w2,int o2, int log2WD, int bit_depth) { put_weighted_bipred_8(_dst,dststride,src1,src2,srcstride,
233		width,height,
234		w1,o1,w2,o2,log2WD); }
235		template <> inline void acceleration_functions::put_weighted_bipred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride,
236		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
237		int width, int height,
238		int w1,int o1, int w2,int o2, int log2WD, int bit_depth) { put_weighted_bipred_16(_dst,dststride,src1,src2,srcstride,
239		width,height,
240		w1,o1,w2,o2,log2WD,bit_depth); }
241		*/
242
243
244		inline void acceleration_functions::put_weighted_pred_avg(void* _dst, ptrdiff_t dststride,
245		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
246		int width, int height, int bit_depth) const
247	0	{
248	0	if (bit_depth <= 8)
249	0	put_weighted_pred_avg_8((uint8_t*)_dst,dststride,src1,src2,srcstride,width,height);
250	0	else
251	0	put_weighted_pred_avg_16((uint16_t*)_dst,dststride,src1,src2,srcstride,width,height,bit_depth);
252	0	}
253
254
255		inline void acceleration_functions::put_unweighted_pred(void* _dst, ptrdiff_t dststride,
256		const int16_t *src, ptrdiff_t srcstride,
257		int width, int height, int bit_depth) const
258	0	{
259	0	if (bit_depth <= 8)
260	0	put_unweighted_pred_8((uint8_t*)_dst,dststride,src,srcstride,width,height);
261	0	else
262	0	put_unweighted_pred_16((uint16_t*)_dst,dststride,src,srcstride,width,height,bit_depth);
263	0	}
264
265
266		inline void acceleration_functions::put_weighted_pred(void* _dst, ptrdiff_t dststride,
267		const int16_t *src, ptrdiff_t srcstride,
268		int width, int height,
269		int w,int o,int log2WD, int bit_depth) const
270	0	{
271	0	if (bit_depth <= 8)
272	0	put_weighted_pred_8((uint8_t*)_dst,dststride,src,srcstride,width,height,w,o,log2WD);
273	0	else
274	0	put_weighted_pred_16((uint16_t*)_dst,dststride,src,srcstride,width,height,w,o,log2WD,bit_depth);
275	0	}
276
277
278		inline void acceleration_functions::put_weighted_bipred(void* _dst, ptrdiff_t dststride,
279		const int16_t src1, const int16_t src2, ptrdiff_t srcstride,
280		int width, int height,
281		int w1,int o1, int w2,int o2, int log2WD, int bit_depth) const
282	0	{
283	0	if (bit_depth <= 8)
284	0	put_weighted_bipred_8((uint8_t*)_dst,dststride,src1,src2,srcstride, width,height, w1,o1,w2,o2,log2WD);
285	0	else
286	0	put_weighted_bipred_16((uint16_t*)_dst,dststride,src1,src2,srcstride, width,height, w1,o1,w2,o2,log2WD,bit_depth);
287	0	}
288
289
290
291		inline void acceleration_functions::put_hevc_epel(int16_t *dst, ptrdiff_t dststride,
292		const void *src, ptrdiff_t srcstride, int width, int height,
293		int mx, int my, int16_t* mcbuffer, int bit_depth) const
294	0	{
295	0	if (bit_depth <= 8)
296	0	put_hevc_epel_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer);
297	0	else
298	0	put_hevc_epel_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth);
299	0	}
300
301		inline void acceleration_functions::put_hevc_epel_h(int16_t *dst, ptrdiff_t dststride,
302		const void *src, ptrdiff_t srcstride, int width, int height,
303		int mx, int my, int16_t* mcbuffer, int bit_depth) const
304	0	{
305	0	if (bit_depth <= 8)
306	0	put_hevc_epel_h_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth);
307	0	else
308	0	put_hevc_epel_h_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth);
309	0	}
310
311		inline void acceleration_functions::put_hevc_epel_v(int16_t *dst, ptrdiff_t dststride,
312		const void *src, ptrdiff_t srcstride, int width, int height,
313		int mx, int my, int16_t* mcbuffer, int bit_depth) const
314	0	{
315	0	if (bit_depth <= 8)
316	0	put_hevc_epel_v_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth);
317	0	else
318	0	put_hevc_epel_v_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth);
319	0	}
320
321		inline void acceleration_functions::put_hevc_epel_hv(int16_t *dst, ptrdiff_t dststride,
322		const void *src, ptrdiff_t srcstride, int width, int height,
323		int mx, int my, int16_t* mcbuffer, int bit_depth) const
324	0	{
325	0	if (bit_depth <= 8)
326	0	put_hevc_epel_hv_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth);
327	0	else
328	0	put_hevc_epel_hv_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth);
329	0	}
330
331		inline void acceleration_functions::put_hevc_qpel(int16_t *dst, ptrdiff_t dststride,
332		const void *src, ptrdiff_t srcstride, int width, int height,
333		int16_t* mcbuffer, int dX,int dY, int bit_depth) const
334	0	{
335	0	if (bit_depth <= 8)
336	0	put_hevc_qpel_8[dX][dY](dst,dststride,(const uint8_t*)src,srcstride,width,height,mcbuffer);
337	0	else
338	0	put_hevc_qpel_16[dX][dY](dst,dststride,(const uint16_t*)src,srcstride,width,height,mcbuffer, bit_depth);
339	0	}
340
341	0	template <> inline void acceleration_functions::transform_skip<uint8_t>(uint8_t dst, const int16_t coeffs,ptrdiff_t stride, int bit_depth) const { transform_skip_8(dst,coeffs,stride); }
342	0	template <> inline void acceleration_functions::transform_skip<uint16_t>(uint16_t dst, const int16_t coeffs, ptrdiff_t stride, int bit_depth) const { transform_skip_16(dst,coeffs,stride, bit_depth); }
343
344	0	template <> inline void acceleration_functions::transform_skip_rdpcm_v<uint8_t>(uint8_t dst, const int16_t coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(bit_depth==8); transform_skip_rdpcm_v_8(dst,coeffs,nT,stride); }
345	0	template <> inline void acceleration_functions::transform_skip_rdpcm_h<uint8_t>(uint8_t dst, const int16_t coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(bit_depth==8); transform_skip_rdpcm_h_8(dst,coeffs,nT,stride); }
346	0	template <> inline void acceleration_functions::transform_skip_rdpcm_v<uint16_t>(uint16_t dst, const int16_t coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(false); /transform_skip_rdpcm_v_8(dst,coeffs,nT,stride);/ }
347	0	template <> inline void acceleration_functions::transform_skip_rdpcm_h<uint16_t>(uint16_t dst, const int16_t coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(false); /transform_skip_rdpcm_h_8(dst,coeffs,nT,stride);/ }
348
349
350	0	template <> inline void acceleration_functions::transform_4x4_dst_add<uint8_t>(uint8_t dst, const int16_t coeffs, ptrdiff_t stride,int bit_depth) const { transform_4x4_dst_add_8(dst,coeffs,stride); }
351	0	template <> inline void acceleration_functions::transform_4x4_dst_add<uint16_t>(uint16_t dst, const int16_t coeffs, ptrdiff_t stride,int bit_depth) const { transform_4x4_dst_add_16(dst,coeffs,stride,bit_depth); }
352
353	0	template <> inline void acceleration_functions::transform_add<uint8_t>(int sizeIdx, uint8_t dst, const int16_t coeffs, ptrdiff_t stride, int bit_depth) const { transform_add_8[sizeIdx](dst,coeffs,stride); }
354	0	template <> inline void acceleration_functions::transform_add<uint16_t>(int sizeIdx, uint16_t dst, const int16_t coeffs, ptrdiff_t stride, int bit_depth) const { transform_add_16[sizeIdx](dst,coeffs,stride,bit_depth); }
355
356	0	template <> inline void acceleration_functions::add_residual(uint8_t dst, ptrdiff_t stride, const int32_t r, int nT, int bit_depth) const { add_residual_8(dst,stride,r,nT,bit_depth); }
357	0	template <> inline void acceleration_functions::add_residual(uint16_t dst, ptrdiff_t stride, const int32_t r, int nT, int bit_depth) const { add_residual_16(dst,stride,r,nT,bit_depth); }
358
359		#endif