/work/libde265/libde265/acceleration.h
Line | Count | Source |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #ifndef DE265_ACCELERATION_H |
22 | | #define DE265_ACCELERATION_H |
23 | | |
24 | | #include <stddef.h> |
25 | | #include <stdint.h> |
26 | | #include <assert.h> |
27 | | |
28 | | |
29 | | struct acceleration_functions |
30 | | { |
31 | | void (*put_weighted_pred_avg_8)(uint8_t *_dst, ptrdiff_t dststride, |
32 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
33 | | int width, int height); |
34 | | |
35 | | void (*put_unweighted_pred_8)(uint8_t *_dst, ptrdiff_t dststride, |
36 | | const int16_t *src, ptrdiff_t srcstride, |
37 | | int width, int height); |
38 | | |
39 | | void (*put_weighted_pred_8)(uint8_t *_dst, ptrdiff_t dststride, |
40 | | const int16_t *src, ptrdiff_t srcstride, |
41 | | int width, int height, |
42 | | int w,int o,int log2WD); |
43 | | void (*put_weighted_bipred_8)(uint8_t *_dst, ptrdiff_t dststride, |
44 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
45 | | int width, int height, |
46 | | int w1,int o1, int w2,int o2, int log2WD); |
47 | | |
48 | | |
49 | | void (*put_weighted_pred_avg_16)(uint16_t *_dst, ptrdiff_t dststride, |
50 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
51 | | int width, int height, int bit_depth); |
52 | | |
53 | | void (*put_unweighted_pred_16)(uint16_t *_dst, ptrdiff_t dststride, |
54 | | const int16_t *src, ptrdiff_t srcstride, |
55 | | int width, int height, int bit_depth); |
56 | | |
57 | | void (*put_weighted_pred_16)(uint16_t *_dst, ptrdiff_t dststride, |
58 | | const int16_t *src, ptrdiff_t srcstride, |
59 | | int width, int height, |
60 | | int w,int o,int log2WD, int bit_depth); |
61 | | void (*put_weighted_bipred_16)(uint16_t *_dst, ptrdiff_t dststride, |
62 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
63 | | int width, int height, |
64 | | int w1,int o1, int w2,int o2, int log2WD, int bit_depth); |
65 | | |
66 | | |
67 | | void put_weighted_pred_avg(void *_dst, ptrdiff_t dststride, |
68 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
69 | | int width, int height, int bit_depth) const; |
70 | | |
71 | | void put_unweighted_pred(void *_dst, ptrdiff_t dststride, |
72 | | const int16_t *src, ptrdiff_t srcstride, |
73 | | int width, int height, int bit_depth) const; |
74 | | |
75 | | void put_weighted_pred(void *_dst, ptrdiff_t dststride, |
76 | | const int16_t *src, ptrdiff_t srcstride, |
77 | | int width, int height, |
78 | | int w,int o,int log2WD, int bit_depth) const; |
79 | | void put_weighted_bipred(void *_dst, ptrdiff_t dststride, |
80 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
81 | | int width, int height, |
82 | | int w1,int o1, int w2,int o2, int log2WD, int bit_depth) const; |
83 | | |
84 | | |
85 | | |
86 | | |
87 | | void (*put_hevc_epel_8)(int16_t *dst, ptrdiff_t dststride, |
88 | | const uint8_t *src, ptrdiff_t srcstride, int width, int height, |
89 | | int mx, int my, int16_t* mcbuffer); |
90 | | void (*put_hevc_epel_h_8)(int16_t *dst, ptrdiff_t dststride, |
91 | | const uint8_t *src, ptrdiff_t srcstride, int width, int height, |
92 | | int mx, int my, int16_t* mcbuffer, int bit_depth); |
93 | | void (*put_hevc_epel_v_8)(int16_t *dst, ptrdiff_t dststride, |
94 | | const uint8_t *src, ptrdiff_t srcstride, int width, int height, |
95 | | int mx, int my, int16_t* mcbuffer, int bit_depth); |
96 | | void (*put_hevc_epel_hv_8)(int16_t *dst, ptrdiff_t dststride, |
97 | | const uint8_t *src, ptrdiff_t srcstride, int width, int height, |
98 | | int mx, int my, int16_t* mcbuffer, int bit_depth); |
99 | | |
100 | | void (*put_hevc_qpel_8[4][4])(int16_t *dst, ptrdiff_t dststride, |
101 | | const uint8_t *src, ptrdiff_t srcstride, int width, int height, |
102 | | int16_t* mcbuffer); |
103 | | |
104 | | |
105 | | void (*put_hevc_epel_16)(int16_t *dst, ptrdiff_t dststride, |
106 | | const uint16_t *src, ptrdiff_t srcstride, int width, int height, |
107 | | int mx, int my, int16_t* mcbuffer, int bit_depth); |
108 | | void (*put_hevc_epel_h_16)(int16_t *dst, ptrdiff_t dststride, |
109 | | const uint16_t *src, ptrdiff_t srcstride, int width, int height, |
110 | | int mx, int my, int16_t* mcbuffer, int bit_depth); |
111 | | void (*put_hevc_epel_v_16)(int16_t *dst, ptrdiff_t dststride, |
112 | | const uint16_t *src, ptrdiff_t srcstride, int width, int height, |
113 | | int mx, int my, int16_t* mcbuffer, int bit_depth); |
114 | | void (*put_hevc_epel_hv_16)(int16_t *dst, ptrdiff_t dststride, |
115 | | const uint16_t *src, ptrdiff_t srcstride, int width, int height, |
116 | | int mx, int my, int16_t* mcbuffer, int bit_depth); |
117 | | |
118 | | void (*put_hevc_qpel_16[4][4])(int16_t *dst, ptrdiff_t dststride, |
119 | | const uint16_t *src, ptrdiff_t srcstride, int width, int height, |
120 | | int16_t* mcbuffer, int bit_depth); |
121 | | |
122 | | |
123 | | void put_hevc_epel(int16_t *dst, ptrdiff_t dststride, |
124 | | const void *src, ptrdiff_t srcstride, int width, int height, |
125 | | int mx, int my, int16_t* mcbuffer, int bit_depth) const; |
126 | | void put_hevc_epel_h(int16_t *dst, ptrdiff_t dststride, |
127 | | const void *src, ptrdiff_t srcstride, int width, int height, |
128 | | int mx, int my, int16_t* mcbuffer, int bit_depth) const; |
129 | | void put_hevc_epel_v(int16_t *dst, ptrdiff_t dststride, |
130 | | const void *src, ptrdiff_t srcstride, int width, int height, |
131 | | int mx, int my, int16_t* mcbuffer, int bit_depth) const; |
132 | | void put_hevc_epel_hv(int16_t *dst, ptrdiff_t dststride, |
133 | | const void *src, ptrdiff_t srcstride, int width, int height, |
134 | | int mx, int my, int16_t* mcbuffer, int bit_depth) const; |
135 | | |
136 | | void put_hevc_qpel(int16_t *dst, ptrdiff_t dststride, |
137 | | const void *src, ptrdiff_t srcstride, int width, int height, |
138 | | int16_t* mcbuffer, int dX,int dY, int bit_depth) const; |
139 | | |
140 | | |
141 | | // --- inverse transforms --- |
142 | | |
143 | | void (*transform_bypass)(int32_t *residual, const int16_t *coeffs, int nT); |
144 | | void (*transform_bypass_rdpcm_v)(int32_t *r, const int16_t *coeffs, int nT); |
145 | | void (*transform_bypass_rdpcm_h)(int32_t *r, const int16_t *coeffs, int nT); |
146 | | |
147 | | // 8 bit |
148 | | |
149 | | void (*transform_skip_8)(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t _stride); // no transform |
150 | | void (*transform_skip_rdpcm_v_8)(uint8_t *_dst, const int16_t *coeffs, int nT, ptrdiff_t _stride); |
151 | | void (*transform_skip_rdpcm_h_8)(uint8_t *_dst, const int16_t *coeffs, int nT, ptrdiff_t _stride); |
152 | | void (*transform_4x4_dst_add_8)(uint8_t *dst, const int16_t *coeffs, ptrdiff_t stride); // iDST |
153 | | void (*transform_add_8[4])(uint8_t *dst, const int16_t *coeffs, ptrdiff_t stride); // iDCT |
154 | | |
155 | | // 9-16 bit |
156 | | |
157 | | void (*transform_skip_16)(uint16_t *_dst, const int16_t *coeffs, ptrdiff_t _stride, int bit_depth); // no transform |
158 | | void (*transform_4x4_dst_add_16)(uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth); // iDST |
159 | | void (*transform_add_16[4])(uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth); // iDCT |
160 | | |
161 | | |
162 | | void (*rotate_coefficients)(int16_t *coeff, int nT); |
163 | | |
164 | | void (*transform_idst_4x4)(int32_t *dst, const int16_t *coeffs, int bdShift, int max_coeff_bits); |
165 | | void (*transform_idct_4x4)(int32_t *dst, const int16_t *coeffs, int bdShift, int max_coeff_bits); |
166 | | void (*transform_idct_8x8)(int32_t *dst, const int16_t *coeffs, int bdShift, int max_coeff_bits); |
167 | | void (*transform_idct_16x16)(int32_t *dst,const int16_t *coeffs,int bdShift, int max_coeff_bits); |
168 | | void (*transform_idct_32x32)(int32_t *dst,const int16_t *coeffs,int bdShift, int max_coeff_bits); |
169 | | void (*add_residual_8)(uint8_t *dst, ptrdiff_t stride, const int32_t* r, int nT, int bit_depth); |
170 | | void (*add_residual_16)(uint16_t *dst,ptrdiff_t stride,const int32_t* r, int nT, int bit_depth); |
171 | | |
172 | | template <class pixel_t> |
173 | | void add_residual(pixel_t *dst, ptrdiff_t stride, const int32_t* r, int nT, int bit_depth) const; |
174 | | |
175 | | // Inverse quantization (no scaling list): for each of the nCoeff entries, |
176 | | // coeffBuf[coeffPos[i]] = Clip16( (coeffList[i]*fact + offset) >> bdShift ). |
177 | | // Contract: fact small enough that coeffList[i]*fact+offset fits in int32 |
178 | | // (caller checks this; the rare int64 case stays scalar). bdShift >= 1. |
179 | | void (*dequant_coeff_block)(int16_t* coeffBuf, const int16_t* coeffList, |
180 | | const int16_t* coeffPos, int nCoeff, |
181 | | int32_t fact, int32_t offset, int32_t bdShift); |
182 | | |
183 | | // --- deblocking (8 bit; one 4-line edge segment) --- |
184 | | void (*deblock_luma_8)(uint8_t* ptr, ptrdiff_t stride, int vertical, |
185 | | int dE, int dEp, int dEq, int tc, int filterP, int filterQ); |
186 | | void (*deblock_chroma_8)(uint8_t* ptr, ptrdiff_t stride, int vertical, |
187 | | int tc, int filterP, int filterQ); |
188 | | |
189 | | void (*rdpcm_v)(int32_t* residual, const int16_t* coeffs, int nT,int tsShift,int bdShift); |
190 | | void (*rdpcm_h)(int32_t* residual, const int16_t* coeffs, int nT,int tsShift,int bdShift); |
191 | | |
192 | | void (*transform_skip_residual)(int32_t *residual, const int16_t *coeffs, int nT, |
193 | | int tsShift,int bdShift); |
194 | | |
195 | | |
196 | | template <class pixel_t> void transform_skip(pixel_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const; |
197 | | template <class pixel_t> void transform_skip_rdpcm_v(pixel_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const; |
198 | | template <class pixel_t> void transform_skip_rdpcm_h(pixel_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const; |
199 | | template <class pixel_t> void transform_4x4_dst_add(pixel_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const; |
200 | | template <class pixel_t> void transform_add(int sizeIdx, pixel_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const; |
201 | | |
202 | | |
203 | | // --- intra prediction --- |
204 | | |
205 | | void (*intra_pred_dc_8 )(uint8_t* dst, ptrdiff_t stride, int nT, int cIdx, const uint8_t* border); |
206 | | void (*intra_pred_dc_16)(uint16_t* dst, ptrdiff_t stride, int nT, int cIdx, const uint16_t* border); |
207 | | void (*intra_pred_planar_8 )(uint8_t* dst, ptrdiff_t stride, int nT, int cIdx, const uint8_t* border); |
208 | | void (*intra_pred_planar_16)(uint16_t* dst, ptrdiff_t stride, int nT, int cIdx, const uint16_t* border); |
209 | | void (*intra_pred_angular_8 )(uint8_t* dst, ptrdiff_t stride, int bit_depth, int disableBoundaryFilter, |
210 | | int xB0, int yB0, int mode, int nT, int cIdx, const uint8_t* border); |
211 | | void (*intra_pred_angular_16)(uint16_t* dst, ptrdiff_t stride, int bit_depth, int disableBoundaryFilter, |
212 | | int xB0, int yB0, int mode, int nT, int cIdx, const uint16_t* border); |
213 | | |
214 | | template <class pixel_t> void intra_pred_dc(pixel_t* dst, ptrdiff_t stride, int nT, int cIdx, const pixel_t* border) const; |
215 | | template <class pixel_t> void intra_pred_planar(pixel_t* dst, ptrdiff_t stride, int nT, int cIdx, const pixel_t* border) const; |
216 | | template <class pixel_t> void intra_pred_angular(pixel_t* dst, ptrdiff_t stride, int bit_depth, int disableBoundaryFilter, |
217 | | int xB0, int yB0, int mode, int nT, int cIdx, const pixel_t* border) const; |
218 | | |
219 | | |
220 | | // --- forward transforms --- |
221 | | |
222 | | void (*fwd_transform_4x4_dst_8)(int16_t *coeffs, const int16_t* src, ptrdiff_t stride); // fDST |
223 | | |
224 | | // indexed with (log2TbSize-2) |
225 | | void (*fwd_transform_8[4]) (int16_t *coeffs, const int16_t *src, ptrdiff_t stride); // fDCT |
226 | | |
227 | | |
228 | | // forward Hadamard transform (without scaling factor) |
229 | | // (4x4,8x8,16x16,32x32) indexed with (log2TbSize-2) |
230 | | void (*hadamard_transform_8[4]) (int16_t *coeffs, const int16_t *src, ptrdiff_t stride); |
231 | | }; |
232 | | |
233 | | |
234 | | /* |
235 | | template <> inline void acceleration_functions::put_weighted_pred_avg<uint8_t>(uint8_t *_dst, ptrdiff_t dststride, |
236 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
237 | | int width, int height, int bit_depth) { put_weighted_pred_avg_8(_dst,dststride,src1,src2,srcstride,width,height); } |
238 | | template <> inline void acceleration_functions::put_weighted_pred_avg<uint16_t>(uint16_t *_dst, ptrdiff_t dststride, |
239 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
240 | | int width, int height, int bit_depth) { put_weighted_pred_avg_16(_dst,dststride,src1,src2, |
241 | | srcstride,width,height,bit_depth); } |
242 | | |
243 | | template <> inline void acceleration_functions::put_unweighted_pred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride, |
244 | | const int16_t *src, ptrdiff_t srcstride, |
245 | | int width, int height, int bit_depth) { put_unweighted_pred_8(_dst,dststride,src,srcstride,width,height); } |
246 | | template <> inline void acceleration_functions::put_unweighted_pred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride, |
247 | | const int16_t *src, ptrdiff_t srcstride, |
248 | | int width, int height, int bit_depth) { put_unweighted_pred_16(_dst,dststride,src,srcstride,width,height,bit_depth); } |
249 | | |
250 | | template <> inline void acceleration_functions::put_weighted_pred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride, |
251 | | const int16_t *src, ptrdiff_t srcstride, |
252 | | int width, int height, |
253 | | int w,int o,int log2WD, int bit_depth) { put_weighted_pred_8(_dst,dststride,src,srcstride,width,height,w,o,log2WD); } |
254 | | template <> inline void acceleration_functions::put_weighted_pred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride, |
255 | | const int16_t *src, ptrdiff_t srcstride, |
256 | | int width, int height, |
257 | | int w,int o,int log2WD, int bit_depth) { put_weighted_pred_16(_dst,dststride,src,srcstride,width,height,w,o,log2WD,bit_depth); } |
258 | | |
259 | | template <> inline void acceleration_functions::put_weighted_bipred<uint8_t>(uint8_t *_dst, ptrdiff_t dststride, |
260 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
261 | | int width, int height, |
262 | | int w1,int o1, int w2,int o2, int log2WD, int bit_depth) { put_weighted_bipred_8(_dst,dststride,src1,src2,srcstride, |
263 | | width,height, |
264 | | w1,o1,w2,o2,log2WD); } |
265 | | template <> inline void acceleration_functions::put_weighted_bipred<uint16_t>(uint16_t *_dst, ptrdiff_t dststride, |
266 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
267 | | int width, int height, |
268 | | int w1,int o1, int w2,int o2, int log2WD, int bit_depth) { put_weighted_bipred_16(_dst,dststride,src1,src2,srcstride, |
269 | | width,height, |
270 | | w1,o1,w2,o2,log2WD,bit_depth); } |
271 | | */ |
272 | | |
273 | | |
274 | | inline void acceleration_functions::put_weighted_pred_avg(void* _dst, ptrdiff_t dststride, |
275 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
276 | | int width, int height, int bit_depth) const |
277 | 0 | { |
278 | 0 | if (bit_depth <= 8) |
279 | 0 | put_weighted_pred_avg_8((uint8_t*)_dst,dststride,src1,src2,srcstride,width,height); |
280 | 0 | else |
281 | 0 | put_weighted_pred_avg_16((uint16_t*)_dst,dststride,src1,src2,srcstride,width,height,bit_depth); |
282 | 0 | } |
283 | | |
284 | | |
285 | | inline void acceleration_functions::put_unweighted_pred(void* _dst, ptrdiff_t dststride, |
286 | | const int16_t *src, ptrdiff_t srcstride, |
287 | | int width, int height, int bit_depth) const |
288 | 0 | { |
289 | 0 | if (bit_depth <= 8) |
290 | 0 | put_unweighted_pred_8((uint8_t*)_dst,dststride,src,srcstride,width,height); |
291 | 0 | else |
292 | 0 | put_unweighted_pred_16((uint16_t*)_dst,dststride,src,srcstride,width,height,bit_depth); |
293 | 0 | } |
294 | | |
295 | | |
296 | | inline void acceleration_functions::put_weighted_pred(void* _dst, ptrdiff_t dststride, |
297 | | const int16_t *src, ptrdiff_t srcstride, |
298 | | int width, int height, |
299 | | int w,int o,int log2WD, int bit_depth) const |
300 | 0 | { |
301 | 0 | if (bit_depth <= 8) |
302 | 0 | put_weighted_pred_8((uint8_t*)_dst,dststride,src,srcstride,width,height,w,o,log2WD); |
303 | 0 | else |
304 | 0 | put_weighted_pred_16((uint16_t*)_dst,dststride,src,srcstride,width,height,w,o,log2WD,bit_depth); |
305 | 0 | } |
306 | | |
307 | | |
308 | | inline void acceleration_functions::put_weighted_bipred(void* _dst, ptrdiff_t dststride, |
309 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
310 | | int width, int height, |
311 | | int w1,int o1, int w2,int o2, int log2WD, int bit_depth) const |
312 | 0 | { |
313 | 0 | if (bit_depth <= 8) |
314 | 0 | put_weighted_bipred_8((uint8_t*)_dst,dststride,src1,src2,srcstride, width,height, w1,o1,w2,o2,log2WD); |
315 | 0 | else |
316 | 0 | put_weighted_bipred_16((uint16_t*)_dst,dststride,src1,src2,srcstride, width,height, w1,o1,w2,o2,log2WD,bit_depth); |
317 | 0 | } |
318 | | |
319 | | |
320 | | |
321 | | inline void acceleration_functions::put_hevc_epel(int16_t *dst, ptrdiff_t dststride, |
322 | | const void *src, ptrdiff_t srcstride, int width, int height, |
323 | | int mx, int my, int16_t* mcbuffer, int bit_depth) const |
324 | 0 | { |
325 | 0 | if (bit_depth <= 8) |
326 | 0 | put_hevc_epel_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer); |
327 | 0 | else |
328 | 0 | put_hevc_epel_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth); |
329 | 0 | } |
330 | | |
331 | | inline void acceleration_functions::put_hevc_epel_h(int16_t *dst, ptrdiff_t dststride, |
332 | | const void *src, ptrdiff_t srcstride, int width, int height, |
333 | | int mx, int my, int16_t* mcbuffer, int bit_depth) const |
334 | 0 | { |
335 | 0 | if (bit_depth <= 8) |
336 | 0 | put_hevc_epel_h_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth); |
337 | 0 | else |
338 | 0 | put_hevc_epel_h_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth); |
339 | 0 | } |
340 | | |
341 | | inline void acceleration_functions::put_hevc_epel_v(int16_t *dst, ptrdiff_t dststride, |
342 | | const void *src, ptrdiff_t srcstride, int width, int height, |
343 | | int mx, int my, int16_t* mcbuffer, int bit_depth) const |
344 | 0 | { |
345 | 0 | if (bit_depth <= 8) |
346 | 0 | put_hevc_epel_v_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth); |
347 | 0 | else |
348 | 0 | put_hevc_epel_v_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth); |
349 | 0 | } |
350 | | |
351 | | inline void acceleration_functions::put_hevc_epel_hv(int16_t *dst, ptrdiff_t dststride, |
352 | | const void *src, ptrdiff_t srcstride, int width, int height, |
353 | | int mx, int my, int16_t* mcbuffer, int bit_depth) const |
354 | 0 | { |
355 | 0 | if (bit_depth <= 8) |
356 | 0 | put_hevc_epel_hv_8(dst,dststride,(const uint8_t*)src,srcstride,width,height,mx,my,mcbuffer,bit_depth); |
357 | 0 | else |
358 | 0 | put_hevc_epel_hv_16(dst,dststride,(const uint16_t*)src,srcstride,width,height,mx,my,mcbuffer, bit_depth); |
359 | 0 | } |
360 | | |
361 | | inline void acceleration_functions::put_hevc_qpel(int16_t *dst, ptrdiff_t dststride, |
362 | | const void *src, ptrdiff_t srcstride, int width, int height, |
363 | | int16_t* mcbuffer, int dX,int dY, int bit_depth) const |
364 | 0 | { |
365 | 0 | if (bit_depth <= 8) |
366 | 0 | put_hevc_qpel_8[dX][dY](dst,dststride,(const uint8_t*)src,srcstride,width,height,mcbuffer); |
367 | 0 | else |
368 | 0 | put_hevc_qpel_16[dX][dY](dst,dststride,(const uint16_t*)src,srcstride,width,height,mcbuffer, bit_depth); |
369 | 0 | } |
370 | | |
371 | 0 | template <> inline void acceleration_functions::transform_skip<uint8_t>(uint8_t *dst, const int16_t *coeffs,ptrdiff_t stride, int bit_depth) const { transform_skip_8(dst,coeffs,stride); } |
372 | 0 | template <> inline void acceleration_functions::transform_skip<uint16_t>(uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const { transform_skip_16(dst,coeffs,stride, bit_depth); } |
373 | | |
374 | 0 | template <> inline void acceleration_functions::transform_skip_rdpcm_v<uint8_t>(uint8_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(bit_depth==8); transform_skip_rdpcm_v_8(dst,coeffs,nT,stride); } |
375 | 0 | template <> inline void acceleration_functions::transform_skip_rdpcm_h<uint8_t>(uint8_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(bit_depth==8); transform_skip_rdpcm_h_8(dst,coeffs,nT,stride); } |
376 | 0 | template <> inline void acceleration_functions::transform_skip_rdpcm_v<uint16_t>(uint16_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(false); /*transform_skip_rdpcm_v_8(dst,coeffs,nT,stride);*/ } |
377 | 0 | template <> inline void acceleration_functions::transform_skip_rdpcm_h<uint16_t>(uint16_t *dst, const int16_t *coeffs, int nT, ptrdiff_t stride, int bit_depth) const { assert(false); /*transform_skip_rdpcm_h_8(dst,coeffs,nT,stride);*/ } |
378 | | |
379 | | |
380 | 0 | template <> inline void acceleration_functions::transform_4x4_dst_add<uint8_t>(uint8_t *dst, const int16_t *coeffs, ptrdiff_t stride,int bit_depth) const { transform_4x4_dst_add_8(dst,coeffs,stride); } |
381 | 0 | template <> inline void acceleration_functions::transform_4x4_dst_add<uint16_t>(uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride,int bit_depth) const { transform_4x4_dst_add_16(dst,coeffs,stride,bit_depth); } |
382 | | |
383 | 0 | template <> inline void acceleration_functions::transform_add<uint8_t>(int sizeIdx, uint8_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const { transform_add_8[sizeIdx](dst,coeffs,stride); } |
384 | 0 | template <> inline void acceleration_functions::transform_add<uint16_t>(int sizeIdx, uint16_t *dst, const int16_t *coeffs, ptrdiff_t stride, int bit_depth) const { transform_add_16[sizeIdx](dst,coeffs,stride,bit_depth); } |
385 | | |
386 | 0 | template <> inline void acceleration_functions::add_residual(uint8_t *dst, ptrdiff_t stride, const int32_t* r, int nT, int bit_depth) const { add_residual_8(dst,stride,r,nT,bit_depth); } |
387 | 0 | template <> inline void acceleration_functions::add_residual(uint16_t *dst, ptrdiff_t stride, const int32_t* r, int nT, int bit_depth) const { add_residual_16(dst,stride,r,nT,bit_depth); } |
388 | | |
389 | 0 | template <> inline void acceleration_functions::intra_pred_dc<uint8_t> (uint8_t* dst, ptrdiff_t stride, int nT, int cIdx, const uint8_t* border) const { intra_pred_dc_8 (dst,stride,nT,cIdx,border); } |
390 | 0 | template <> inline void acceleration_functions::intra_pred_dc<uint16_t>(uint16_t* dst, ptrdiff_t stride, int nT, int cIdx, const uint16_t* border) const { intra_pred_dc_16(dst,stride,nT,cIdx,border); } |
391 | | |
392 | 0 | template <> inline void acceleration_functions::intra_pred_planar<uint8_t> (uint8_t* dst, ptrdiff_t stride, int nT, int cIdx, const uint8_t* border) const { intra_pred_planar_8 (dst,stride,nT,cIdx,border); } |
393 | 0 | template <> inline void acceleration_functions::intra_pred_planar<uint16_t>(uint16_t* dst, ptrdiff_t stride, int nT, int cIdx, const uint16_t* border) const { intra_pred_planar_16(dst,stride,nT,cIdx,border); } |
394 | | |
395 | 0 | template <> inline void acceleration_functions::intra_pred_angular<uint8_t> (uint8_t* dst, ptrdiff_t stride, int bit_depth, int disableBoundaryFilter, int xB0, int yB0, int mode, int nT, int cIdx, const uint8_t* border) const { intra_pred_angular_8 (dst,stride,bit_depth,disableBoundaryFilter,xB0,yB0,mode,nT,cIdx,border); } |
396 | 0 | template <> inline void acceleration_functions::intra_pred_angular<uint16_t>(uint16_t* dst, ptrdiff_t stride, int bit_depth, int disableBoundaryFilter, int xB0, int yB0, int mode, int nT, int cIdx, const uint16_t* border) const { intra_pred_angular_16(dst,stride,bit_depth,disableBoundaryFilter,xB0,yB0,mode,nT,cIdx,border); } |
397 | | |
398 | | #endif |