/src/libde265/libde265/fallback-motion.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "fallback-motion.h" |
22 | | #include "util.h" |
23 | | |
24 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
25 | | # include <malloc.h> |
26 | | #elif defined(HAVE_ALLOCA_H) |
27 | | # include <alloca.h> |
28 | | #endif |
29 | | |
30 | | #include <assert.h> |
31 | | |
32 | | |
33 | | void put_unweighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
34 | | const int16_t *src, ptrdiff_t srcstride, |
35 | | int width, int height) |
36 | 0 | { |
37 | 0 | int offset8bit = 32; |
38 | 0 | int shift8bit = 6; |
39 | |
|
40 | 0 | assert((width&1)==0); |
41 | | |
42 | 0 | for (int y=0;y<height;y++) { |
43 | 0 | const int16_t* in = &src[y*srcstride]; |
44 | 0 | uint8_t* out = &dst[y*dststride]; |
45 | |
|
46 | 0 | for (int x=0;x<width;x+=2) { |
47 | 0 | out[0] = Clip1_8bit((in[0] + offset8bit)>>shift8bit); |
48 | 0 | out[1] = Clip1_8bit((in[1] + offset8bit)>>shift8bit); |
49 | 0 | out+=2; in+=2; |
50 | 0 | } |
51 | 0 | } |
52 | 0 | } |
53 | | |
54 | | |
55 | | void put_weighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
56 | | const int16_t *src, ptrdiff_t srcstride, |
57 | | int width, int height, |
58 | | int w,int o,int log2WD) |
59 | 1.33M | { |
60 | 1.33M | assert(log2WD>=1); // TODO |
61 | | |
62 | 1.33M | const int rnd = (1<<(log2WD-1)); |
63 | | |
64 | 8.82M | for (int y=0;y<height;y++) { |
65 | 7.48M | const int16_t* in = &src[y*srcstride]; |
66 | 7.48M | uint8_t* out = &dst[y*dststride]; |
67 | | |
68 | 63.2M | for (int x=0;x<width;x++) { |
69 | 55.7M | out[0] = Clip1_8bit(((in[0]*w + rnd)>>log2WD) + o); |
70 | 55.7M | out++; in++; |
71 | 55.7M | } |
72 | 7.48M | } |
73 | 1.33M | } |
74 | | |
75 | | void put_weighted_bipred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
76 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
77 | | int width, int height, |
78 | | int w1,int o1, int w2,int o2, int log2WD) |
79 | 613k | { |
80 | 613k | assert(log2WD>=1); // TODO |
81 | | |
82 | 613k | const int rnd = ((o1+o2+1) << log2WD); |
83 | | |
84 | 5.65M | for (int y=0;y<height;y++) { |
85 | 5.04M | const int16_t* in1 = &src1[y*srcstride]; |
86 | 5.04M | const int16_t* in2 = &src2[y*srcstride]; |
87 | 5.04M | uint8_t* out = &dst[y*dststride]; |
88 | | |
89 | 67.6M | for (int x=0;x<width;x++) { |
90 | 62.6M | out[0] = Clip1_8bit((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1)); |
91 | 62.6M | out++; in1++; in2++; |
92 | 62.6M | } |
93 | 5.04M | } |
94 | 613k | } |
95 | | |
96 | | |
97 | | void put_weighted_pred_avg_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
98 | | const int16_t *src1, const int16_t *src2, |
99 | | ptrdiff_t srcstride, int width, |
100 | | int height) |
101 | 0 | { |
102 | 0 | int offset8bit = 64; |
103 | 0 | int shift8bit = 7; |
104 | |
|
105 | 0 | assert((width&1)==0); |
106 | | |
107 | | // I had a special case for 8-pixel parallel, unrolled code, |
108 | | // but I did not see any speedup. |
109 | | |
110 | | #if 0 |
111 | | for (int y=0;y<height;y++) { |
112 | | int16_t* in1 = &src1[y*srcstride]; |
113 | | int16_t* in2 = &src2[y*srcstride]; |
114 | | uint8_t* out = &dst[y*dststride]; |
115 | | |
116 | | for (int x=0;x<width;x++) { |
117 | | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
118 | | out++; in1++; in2++; |
119 | | } |
120 | | } |
121 | | #endif |
122 | | |
123 | | #if 0 |
124 | | if ((width&7)==0) { |
125 | | for (int y=0;y<height;y++) { |
126 | | int16_t* in1 = &src1[y*srcstride]; |
127 | | int16_t* in2 = &src2[y*srcstride]; |
128 | | uint8_t* out = &dst[y*dststride]; |
129 | | |
130 | | for (int x=0;x<width;x+=8) { |
131 | | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
132 | | out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit); |
133 | | out[2] = Clip1_8bit((in1[2] + in2[2] + offset8bit)>>shift8bit); |
134 | | out[3] = Clip1_8bit((in1[3] + in2[3] + offset8bit)>>shift8bit); |
135 | | out[4] = Clip1_8bit((in1[4] + in2[4] + offset8bit)>>shift8bit); |
136 | | out[5] = Clip1_8bit((in1[5] + in2[5] + offset8bit)>>shift8bit); |
137 | | out[6] = Clip1_8bit((in1[6] + in2[6] + offset8bit)>>shift8bit); |
138 | | out[7] = Clip1_8bit((in1[7] + in2[7] + offset8bit)>>shift8bit); |
139 | | out+=8; in1+=8; in2+=8; |
140 | | } |
141 | | } |
142 | | } |
143 | | else |
144 | | #endif |
145 | 0 | { |
146 | 0 | for (int y=0;y<height;y++) { |
147 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
148 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
149 | 0 | uint8_t* out = &dst[y*dststride]; |
150 | |
|
151 | 0 | for (int x=0;x<width;x+=2) { |
152 | 0 | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
153 | 0 | out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit); |
154 | 0 | out+=2; in1+=2; in2+=2; |
155 | 0 | } |
156 | 0 | } |
157 | 0 | } |
158 | 0 | } |
159 | | |
160 | | |
161 | | |
162 | | |
163 | | |
164 | | void put_unweighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
165 | | const int16_t *src, ptrdiff_t srcstride, |
166 | | int width, int height, int bit_depth) |
167 | 1.40M | { |
168 | 1.40M | int shift1 = 14-bit_depth; |
169 | 1.40M | int offset1 = 0; |
170 | 1.40M | if (shift1>0) { offset1 = 1<<(shift1-1); } |
171 | | |
172 | 1.40M | assert((width&1)==0); |
173 | | |
174 | 10.5M | for (int y=0;y<height;y++) { |
175 | 9.12M | const int16_t* in = &src[y*srcstride]; |
176 | 9.12M | uint16_t* out = &dst[y*dststride]; |
177 | | |
178 | 50.1M | for (int x=0;x<width;x+=2) { |
179 | 41.0M | out[0] = Clip_BitDepth((in[0] + offset1)>>shift1, bit_depth); |
180 | 41.0M | out[1] = Clip_BitDepth((in[1] + offset1)>>shift1, bit_depth); |
181 | 41.0M | out+=2; in+=2; |
182 | 41.0M | } |
183 | 9.12M | } |
184 | 1.40M | } |
185 | | |
186 | | #include <stdlib.h> |
187 | | |
188 | | void put_weighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
189 | | const int16_t *src, ptrdiff_t srcstride, |
190 | | int width, int height, |
191 | | int w,int o,int log2WD, int bit_depth) |
192 | 935k | { |
193 | 935k | assert(log2WD>=1); // TODO |
194 | | |
195 | 935k | const int rnd = (1<<(log2WD-1)); |
196 | | |
197 | 5.99M | for (int y=0;y<height;y++) { |
198 | 5.06M | const int16_t* in = &src[y*srcstride]; |
199 | 5.06M | uint16_t* out = &dst[y*dststride]; |
200 | | |
201 | 41.5M | for (int x=0;x<width;x++) { |
202 | 36.4M | out[0] = Clip_BitDepth(((in[0]*w + rnd)>>log2WD) + o, bit_depth); |
203 | 36.4M | out++; in++; |
204 | 36.4M | } |
205 | 5.06M | } |
206 | 935k | } |
207 | | |
208 | | void put_weighted_bipred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
209 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
210 | | int width, int height, |
211 | | int w1,int o1, int w2,int o2, int log2WD, int bit_depth) |
212 | 292k | { |
213 | 292k | assert(log2WD>=1); // TODO |
214 | | |
215 | 292k | const int rnd = ((o1+o2+1) << log2WD); |
216 | | |
217 | 2.53M | for (int y=0;y<height;y++) { |
218 | 2.24M | const int16_t* in1 = &src1[y*srcstride]; |
219 | 2.24M | const int16_t* in2 = &src2[y*srcstride]; |
220 | 2.24M | uint16_t* out = &dst[y*dststride]; |
221 | | |
222 | 28.3M | for (int x=0;x<width;x++) { |
223 | 26.0M | out[0] = Clip_BitDepth((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1), bit_depth); |
224 | 26.0M | out++; in1++; in2++; |
225 | 26.0M | } |
226 | 2.24M | } |
227 | 292k | } |
228 | | |
229 | | |
230 | | void put_weighted_pred_avg_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
231 | | const int16_t *src1, const int16_t *src2, |
232 | | ptrdiff_t srcstride, int width, |
233 | | int height, int bit_depth) |
234 | 575k | { |
235 | 575k | int shift2 = 15-bit_depth; |
236 | 575k | int offset2 = 1<<(shift2-1); |
237 | | |
238 | 575k | assert((width&1)==0); |
239 | | |
240 | 4.99M | for (int y=0;y<height;y++) { |
241 | 4.42M | const int16_t* in1 = &src1[y*srcstride]; |
242 | 4.42M | const int16_t* in2 = &src2[y*srcstride]; |
243 | 4.42M | uint16_t* out = &dst[y*dststride]; |
244 | | |
245 | 29.6M | for (int x=0;x<width;x+=2) { |
246 | 25.2M | out[0] = Clip_BitDepth((in1[0] + in2[0] + offset2)>>shift2, bit_depth); |
247 | 25.2M | out[1] = Clip_BitDepth((in1[1] + in2[1] + offset2)>>shift2, bit_depth); |
248 | 25.2M | out+=2; in1+=2; in2+=2; |
249 | 25.2M | } |
250 | 4.42M | } |
251 | 575k | } |
252 | | |
253 | | |
254 | | |
255 | | |
256 | | |
257 | | void put_epel_8_fallback(int16_t *out, ptrdiff_t out_stride, |
258 | | const uint8_t *src, ptrdiff_t src_stride, |
259 | | int width, int height, |
260 | | int mx, int my, int16_t* mcbuffer) |
261 | 0 | { |
262 | 0 | int shift3 = 6; |
263 | |
|
264 | 0 | for (int y=0;y<height;y++) { |
265 | 0 | int16_t* o = &out[y*out_stride]; |
266 | 0 | const uint8_t* i = &src[y*src_stride]; |
267 | |
|
268 | 0 | for (int x=0;x<width;x++) { |
269 | 0 | *o = *i << shift3; |
270 | 0 | o++; |
271 | 0 | i++; |
272 | 0 | } |
273 | 0 | } |
274 | 0 | } |
275 | | |
276 | | |
277 | | void put_epel_16_fallback(int16_t *out, ptrdiff_t out_stride, |
278 | | const uint16_t *src, ptrdiff_t src_stride, |
279 | | int width, int height, |
280 | | int mx, int my, int16_t* mcbuffer, int bit_depth) |
281 | 1.32M | { |
282 | 1.32M | int shift3 = 14 - bit_depth; |
283 | | |
284 | 8.27M | for (int y=0;y<height;y++) { |
285 | 6.95M | int16_t* o = &out[y*out_stride]; |
286 | 6.95M | const uint16_t* i = &src[y*src_stride]; |
287 | | |
288 | 55.9M | for (int x=0;x<width;x++) { |
289 | 48.9M | *o = *i << shift3; |
290 | 48.9M | o++; |
291 | 48.9M | i++; |
292 | 48.9M | } |
293 | 6.95M | } |
294 | 1.32M | } |
295 | | |
296 | | |
297 | | template <class pixel_t> |
298 | | void put_epel_hv_fallback(int16_t *dst, ptrdiff_t dst_stride, |
299 | | const pixel_t *src, ptrdiff_t src_stride, |
300 | | int nPbWC, int nPbHC, |
301 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth) |
302 | 1.17M | { |
303 | 1.17M | const int shift1 = bit_depth-8; |
304 | 1.17M | const int shift2 = 6; |
305 | | //const int shift3 = 6; |
306 | | |
307 | 1.17M | int extra_left = 1; |
308 | 1.17M | int extra_top = 1; |
309 | | // int extra_right = 2; |
310 | 1.17M | int extra_bottom= 2; |
311 | | |
312 | | |
313 | 1.17M | int nPbH_extra = extra_top + nPbHC + extra_bottom; |
314 | | |
315 | 1.17M | int16_t* tmp2buf = (int16_t*)alloca( nPbWC * nPbH_extra * sizeof(int16_t) ); |
316 | | |
317 | | /* |
318 | | int nPbW_extra = extra_left + nPbWC + extra_right; |
319 | | |
320 | | |
321 | | printf("x,y FracC: %d/%d\n",xFracC,yFracC); |
322 | | |
323 | | printf("---IN---\n"); |
324 | | |
325 | | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { |
326 | | uint8_t* p = &src[y*src_stride -extra_left]; |
327 | | |
328 | | for (int x=-extra_left;x<nPbWC+extra_right;x++) { |
329 | | printf("%05d ",*p << 6); |
330 | | p++; |
331 | | } |
332 | | printf("\n"); |
333 | | } |
334 | | */ |
335 | | |
336 | | |
337 | | // H-filters |
338 | | |
339 | 1.17M | logtrace(LogMotion,"---H---\n"); |
340 | | //printf("---H---(%d)\n",xFracC); |
341 | | |
342 | 10.8M | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { |
343 | 9.62M | const pixel_t* p = &src[y*src_stride - extra_left]; |
344 | | |
345 | 67.6M | for (int x=0;x<nPbWC;x++) { |
346 | 58.0M | int16_t v; |
347 | 58.0M | switch (xFracC) { |
348 | 11.3M | case 0: v = p[1]; break; |
349 | 8.60M | case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>shift1; break; |
350 | 7.95M | case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>shift1; break; |
351 | 3.82M | case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>shift1; break; |
352 | 5.52M | case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>shift1; break; |
353 | 3.57M | case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>shift1; break; |
354 | 7.26M | case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>shift1; break; |
355 | 0 | default: |
356 | 9.92M | case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>shift1; break; |
357 | 58.0M | } |
358 | | |
359 | | //printf("%d %d %d %d -> %d\n",p[0],p[1],p[2],p[3],v); |
360 | | |
361 | 58.0M | tmp2buf[y+extra_top + x*nPbH_extra] = v; |
362 | 58.0M | p++; |
363 | | |
364 | | //printf("%05d ",tmp2buf[y+extra_top + x*nPbH_extra]); |
365 | 58.0M | } |
366 | | //printf("\n"); |
367 | 9.62M | } |
368 | | |
369 | | // V-filters |
370 | | |
371 | 1.17M | int vshift = (xFracC==0 ? shift1 : shift2); |
372 | | |
373 | 6.81M | for (int x=0;x<nPbWC;x++) { |
374 | 5.64M | int16_t* p = &tmp2buf[x*nPbH_extra]; |
375 | | |
376 | 46.7M | for (int y=0;y<nPbHC;y++) { |
377 | 41.0M | int16_t v; |
378 | | //logtrace(LogMotion,"%x %x %x %x %x %x %x\n",p[0],p[1],p[2],p[3],p[4],p[5],p[6]); |
379 | | |
380 | 41.0M | switch (yFracC) { |
381 | 7.30M | case 0: v = p[1]; break; |
382 | 5.37M | case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>vshift; break; |
383 | 6.93M | case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>vshift; break; |
384 | 3.10M | case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>vshift; break; |
385 | 5.34M | case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>vshift; break; |
386 | 2.49M | case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>vshift; break; |
387 | 6.32M | case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>vshift; break; |
388 | 0 | default: |
389 | 4.20M | case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>vshift; break; |
390 | 41.0M | } |
391 | | |
392 | 41.0M | dst[x + y*dst_stride] = v; |
393 | 41.0M | p++; |
394 | 41.0M | } |
395 | | |
396 | 5.64M | } |
397 | | |
398 | | /* |
399 | | printf("---V---\n"); |
400 | | for (int y=0;y<nPbHC;y++) { |
401 | | for (int x=0;x<nPbWC;x++) { |
402 | | printf("%05d ",dst[x+y*dst_stride]); |
403 | | } |
404 | | printf("\n"); |
405 | | } |
406 | | */ |
407 | 1.17M | } Unexecuted instantiation: void put_epel_hv_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, int, int, short*, int) void put_epel_hv_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, int, int, short*, int) Line | Count | Source | 302 | 1.17M | { | 303 | 1.17M | const int shift1 = bit_depth-8; | 304 | 1.17M | const int shift2 = 6; | 305 | | //const int shift3 = 6; | 306 | | | 307 | 1.17M | int extra_left = 1; | 308 | 1.17M | int extra_top = 1; | 309 | | // int extra_right = 2; | 310 | 1.17M | int extra_bottom= 2; | 311 | | | 312 | | | 313 | 1.17M | int nPbH_extra = extra_top + nPbHC + extra_bottom; | 314 | | | 315 | 1.17M | int16_t* tmp2buf = (int16_t*)alloca( nPbWC * nPbH_extra * sizeof(int16_t) ); | 316 | | | 317 | | /* | 318 | | int nPbW_extra = extra_left + nPbWC + extra_right; | 319 | | | 320 | | | 321 | | printf("x,y FracC: %d/%d\n",xFracC,yFracC); | 322 | | | 323 | | printf("---IN---\n"); | 324 | | | 325 | | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { | 326 | | uint8_t* p = &src[y*src_stride -extra_left]; | 327 | | | 328 | | for (int x=-extra_left;x<nPbWC+extra_right;x++) { | 329 | | printf("%05d ",*p << 6); | 330 | | p++; | 331 | | } | 332 | | printf("\n"); | 333 | | } | 334 | | */ | 335 | | | 336 | | | 337 | | // H-filters | 338 | | | 339 | 1.17M | logtrace(LogMotion,"---H---\n"); | 340 | | //printf("---H---(%d)\n",xFracC); | 341 | | | 342 | 10.8M | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { | 343 | 9.62M | const pixel_t* p = &src[y*src_stride - extra_left]; | 344 | | | 345 | 67.6M | for (int x=0;x<nPbWC;x++) { | 346 | 58.0M | int16_t v; | 347 | 58.0M | switch (xFracC) { | 348 | 11.3M | case 0: v = p[1]; break; | 349 | 8.60M | case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>shift1; break; | 350 | 7.95M | case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>shift1; break; | 351 | 3.82M | case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>shift1; break; | 352 | 5.52M | case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>shift1; break; | 353 | 3.57M | case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>shift1; break; | 354 | 7.26M | case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>shift1; break; | 355 | 0 | default: | 356 | 9.92M | case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>shift1; break; | 357 | 58.0M | } | 358 | | | 359 | | //printf("%d %d %d %d -> %d\n",p[0],p[1],p[2],p[3],v); | 360 | | | 361 | 58.0M | tmp2buf[y+extra_top + x*nPbH_extra] = v; | 362 | 58.0M | p++; | 363 | | | 364 | | //printf("%05d ",tmp2buf[y+extra_top + x*nPbH_extra]); | 365 | 58.0M | } | 366 | | //printf("\n"); | 367 | 9.62M | } | 368 | | | 369 | | // V-filters | 370 | | | 371 | 1.17M | int vshift = (xFracC==0 ? shift1 : shift2); | 372 | | | 373 | 6.81M | for (int x=0;x<nPbWC;x++) { | 374 | 5.64M | int16_t* p = &tmp2buf[x*nPbH_extra]; | 375 | | | 376 | 46.7M | for (int y=0;y<nPbHC;y++) { | 377 | 41.0M | int16_t v; | 378 | | //logtrace(LogMotion,"%x %x %x %x %x %x %x\n",p[0],p[1],p[2],p[3],p[4],p[5],p[6]); | 379 | | | 380 | 41.0M | switch (yFracC) { | 381 | 7.30M | case 0: v = p[1]; break; | 382 | 5.37M | case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>vshift; break; | 383 | 6.93M | case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>vshift; break; | 384 | 3.10M | case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>vshift; break; | 385 | 5.34M | case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>vshift; break; | 386 | 2.49M | case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>vshift; break; | 387 | 6.32M | case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>vshift; break; | 388 | 0 | default: | 389 | 4.20M | case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>vshift; break; | 390 | 41.0M | } | 391 | | | 392 | 41.0M | dst[x + y*dst_stride] = v; | 393 | 41.0M | p++; | 394 | 41.0M | } | 395 | | | 396 | 5.64M | } | 397 | | | 398 | | /* | 399 | | printf("---V---\n"); | 400 | | for (int y=0;y<nPbHC;y++) { | 401 | | for (int x=0;x<nPbWC;x++) { | 402 | | printf("%05d ",dst[x+y*dst_stride]); | 403 | | } | 404 | | printf("\n"); | 405 | | } | 406 | | */ | 407 | 1.17M | } |
|
408 | | |
409 | | |
410 | | template |
411 | | void put_epel_hv_fallback<uint8_t>(int16_t *dst, ptrdiff_t dst_stride, |
412 | | const uint8_t *src, ptrdiff_t src_stride, |
413 | | int nPbWC, int nPbHC, |
414 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth); |
415 | | template |
416 | | void put_epel_hv_fallback<uint16_t>(int16_t *dst, ptrdiff_t dst_stride, |
417 | | const uint16_t *src, ptrdiff_t src_stride, |
418 | | int nPbWC, int nPbHC, |
419 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth); |
420 | | |
421 | | |
422 | | |
423 | | void put_qpel_0_0_fallback(int16_t *out, ptrdiff_t out_stride, |
424 | | const uint8_t *src, ptrdiff_t srcstride, |
425 | | int nPbW, int nPbH, int16_t* mcbuffer) |
426 | 0 | { |
427 | | //const int shift1 = 0; // sps->BitDepth_Y-8; |
428 | 0 | const int shift2 = 6; |
429 | | |
430 | | // straight copy |
431 | |
|
432 | 0 | for (int y=0;y<nPbH;y++) { |
433 | 0 | const uint8_t* p = src + srcstride*y; |
434 | 0 | int16_t* o = out + out_stride*y; |
435 | |
|
436 | 0 | for (int x=0;x<nPbW;x+=4) { |
437 | | |
438 | | // does not seem to be faster... |
439 | 0 | int16_t o0,o1,o2,o3; |
440 | 0 | o0 = p[0] << shift2; |
441 | 0 | o1 = p[1] << shift2; |
442 | 0 | o2 = p[2] << shift2; |
443 | 0 | o3 = p[3] << shift2; |
444 | 0 | o[0]=o0; |
445 | 0 | o[1]=o1; |
446 | 0 | o[2]=o2; |
447 | 0 | o[3]=o3; |
448 | |
|
449 | 0 | o+=4; |
450 | 0 | p+=4; |
451 | 0 | } |
452 | 0 | } |
453 | 0 | } |
454 | | |
455 | | |
456 | | void put_qpel_0_0_fallback_16(int16_t *out, ptrdiff_t out_stride, |
457 | | const uint16_t *src, ptrdiff_t srcstride, |
458 | | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) |
459 | 868k | { |
460 | | //const int shift1 = bit_depth-8; |
461 | | //const int shift2 = 6; |
462 | 868k | const int shift3 = 14-bit_depth; |
463 | | |
464 | | // straight copy |
465 | | |
466 | 8.94M | for (int y=0;y<nPbH;y++) { |
467 | 8.07M | const uint16_t* p = src + srcstride*y; |
468 | 8.07M | int16_t* o = out + out_stride*y; |
469 | | |
470 | 110M | for (int x=0;x<nPbW;x++) { |
471 | 102M | *o++ = *p++ << shift3; |
472 | 102M | } |
473 | 8.07M | } |
474 | 868k | } |
475 | | |
476 | | |
477 | | |
478 | | static int extra_before[4] = { 0,3,3,2 }; |
479 | | static int extra_after [4] = { 0,3,4,4 }; |
480 | | |
481 | | template <class pixel_t> |
482 | | void put_qpel_fallback(int16_t *out, ptrdiff_t out_stride, |
483 | | const pixel_t *src, ptrdiff_t srcstride, |
484 | | int nPbW, int nPbH, int16_t* mcbuffer, |
485 | | int xFracL, int yFracL, int bit_depth) |
486 | 625k | { |
487 | 625k | int extra_left = extra_before[xFracL]; |
488 | | //int extra_right = extra_after [xFracL]; |
489 | 625k | int extra_top = extra_before[yFracL]; |
490 | 625k | int extra_bottom = extra_after [yFracL]; |
491 | | |
492 | | //int nPbW_extra = extra_left + nPbW + extra_right; |
493 | 625k | int nPbH_extra = extra_top + nPbH + extra_bottom; |
494 | | |
495 | 625k | const int shift1 = bit_depth-8; |
496 | 625k | const int shift2 = 6; |
497 | | |
498 | | |
499 | | // H-filters |
500 | | |
501 | 625k | switch (xFracL) { |
502 | 146k | case 0: |
503 | 2.40M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
504 | 2.26M | const pixel_t* p = src + srcstride*y - extra_left; |
505 | 2.26M | int16_t* o = &mcbuffer[y+extra_top]; |
506 | | |
507 | 29.3M | for (int x=0;x<nPbW;x++) { |
508 | 27.0M | *o = *p; |
509 | 27.0M | o += nPbH_extra; |
510 | 27.0M | p++; |
511 | 27.0M | } |
512 | 2.26M | } |
513 | 146k | break; |
514 | 163k | case 1: |
515 | 2.39M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
516 | 2.22M | const pixel_t* p = src + srcstride*y - extra_left; |
517 | 2.22M | int16_t* o = &mcbuffer[y+extra_top]; |
518 | | |
519 | 28.4M | for (int x=0;x<nPbW;x++) { |
520 | 26.1M | *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>shift1; |
521 | 26.1M | o += nPbH_extra; |
522 | 26.1M | p++; |
523 | 26.1M | } |
524 | 2.22M | } |
525 | 163k | break; |
526 | 135k | case 2: |
527 | 2.05M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
528 | 1.91M | const pixel_t* p = src + srcstride*y - extra_left; |
529 | 1.91M | int16_t* o = &mcbuffer[y+extra_top]; |
530 | | |
531 | 23.6M | for (int x=0;x<nPbW;x++) { |
532 | 21.6M | *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>shift1; |
533 | 21.6M | o += nPbH_extra; |
534 | 21.6M | p++; |
535 | 21.6M | } |
536 | 1.91M | } |
537 | 135k | break; |
538 | 179k | case 3: |
539 | 2.66M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
540 | 2.48M | const pixel_t* p = src + srcstride*y - extra_left; |
541 | 2.48M | int16_t* o = &mcbuffer[y+extra_top]; |
542 | | |
543 | 32.5M | for (int x=0;x<nPbW;x++) { |
544 | 30.1M | *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>shift1; |
545 | 30.1M | o += nPbH_extra; |
546 | 30.1M | p++; |
547 | 30.1M | } |
548 | 2.48M | } |
549 | 179k | break; |
550 | 625k | } |
551 | | |
552 | | |
553 | 625k | logtrace(LogMotion,"---H---\n"); |
554 | | |
555 | 9.51M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
556 | 113M | for (int x=0;x<nPbW;x++) { |
557 | 105M | logtrace(LogMotion,"%04x ",mcbuffer[y+extra_top + x*nPbH_extra]); |
558 | 105M | } |
559 | 8.89M | logtrace(LogMotion,"\n"); |
560 | 8.89M | } |
561 | | |
562 | | // V-filters |
563 | | |
564 | 625k | int vshift = (xFracL==0 ? shift1 : shift2); |
565 | | |
566 | 625k | switch (yFracL) { |
567 | 127k | case 0: |
568 | 1.31M | for (int x=0;x<nPbW;x++) { |
569 | 1.18M | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
570 | 1.18M | int16_t* o = &out[x]; |
571 | | |
572 | 15.2M | for (int y=0;y<nPbH;y++) { |
573 | 14.0M | *o = *p; |
574 | 14.0M | o+=out_stride; |
575 | 14.0M | p++; |
576 | 14.0M | } |
577 | 1.18M | } |
578 | 127k | break; |
579 | 198k | case 1: |
580 | 2.14M | for (int x=0;x<nPbW;x++) { |
581 | 1.94M | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
582 | 1.94M | int16_t* o = &out[x]; |
583 | | |
584 | 24.9M | for (int y=0;y<nPbH;y++) { |
585 | 22.9M | *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>vshift; |
586 | 22.9M | o+=out_stride; |
587 | 22.9M | p++; |
588 | 22.9M | } |
589 | 1.94M | } |
590 | 198k | break; |
591 | 125k | case 2: |
592 | 1.39M | for (int x=0;x<nPbW;x++) { |
593 | 1.27M | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
594 | 1.27M | int16_t* o = &out[x]; |
595 | | |
596 | 17.8M | for (int y=0;y<nPbH;y++) { |
597 | 16.6M | *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>vshift; |
598 | 16.6M | o+=out_stride; |
599 | 16.6M | p++; |
600 | 16.6M | } |
601 | 1.27M | } |
602 | 125k | break; |
603 | 173k | case 3: |
604 | 1.86M | for (int x=0;x<nPbW;x++) { |
605 | 1.68M | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
606 | 1.68M | int16_t* o = &out[x]; |
607 | | |
608 | 22.3M | for (int y=0;y<nPbH;y++) { |
609 | 20.6M | *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>vshift; |
610 | 20.6M | o+=out_stride; |
611 | 20.6M | p++; |
612 | 20.6M | } |
613 | 1.68M | } |
614 | 173k | break; |
615 | 625k | } |
616 | | |
617 | | |
618 | 625k | logtrace(LogMotion,"---V---\n"); |
619 | 6.40M | for (int y=0;y<nPbH;y++) { |
620 | 80.0M | for (int x=0;x<nPbW;x++) { |
621 | 74.3M | logtrace(LogMotion,"%04x ",out[x+y*out_stride]); |
622 | 74.3M | } |
623 | 5.77M | logtrace(LogMotion,"\n"); |
624 | 5.77M | } |
625 | 625k | } Unexecuted instantiation: void put_qpel_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, short*, int, int, int) void put_qpel_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, short*, int, int, int) Line | Count | Source | 486 | 625k | { | 487 | 625k | int extra_left = extra_before[xFracL]; | 488 | | //int extra_right = extra_after [xFracL]; | 489 | 625k | int extra_top = extra_before[yFracL]; | 490 | 625k | int extra_bottom = extra_after [yFracL]; | 491 | | | 492 | | //int nPbW_extra = extra_left + nPbW + extra_right; | 493 | 625k | int nPbH_extra = extra_top + nPbH + extra_bottom; | 494 | | | 495 | 625k | const int shift1 = bit_depth-8; | 496 | 625k | const int shift2 = 6; | 497 | | | 498 | | | 499 | | // H-filters | 500 | | | 501 | 625k | switch (xFracL) { | 502 | 146k | case 0: | 503 | 2.40M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { | 504 | 2.26M | const pixel_t* p = src + srcstride*y - extra_left; | 505 | 2.26M | int16_t* o = &mcbuffer[y+extra_top]; | 506 | | | 507 | 29.3M | for (int x=0;x<nPbW;x++) { | 508 | 27.0M | *o = *p; | 509 | 27.0M | o += nPbH_extra; | 510 | 27.0M | p++; | 511 | 27.0M | } | 512 | 2.26M | } | 513 | 146k | break; | 514 | 163k | case 1: | 515 | 2.39M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { | 516 | 2.22M | const pixel_t* p = src + srcstride*y - extra_left; | 517 | 2.22M | int16_t* o = &mcbuffer[y+extra_top]; | 518 | | | 519 | 28.4M | for (int x=0;x<nPbW;x++) { | 520 | 26.1M | *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>shift1; | 521 | 26.1M | o += nPbH_extra; | 522 | 26.1M | p++; | 523 | 26.1M | } | 524 | 2.22M | } | 525 | 163k | break; | 526 | 135k | case 2: | 527 | 2.05M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { | 528 | 1.91M | const pixel_t* p = src + srcstride*y - extra_left; | 529 | 1.91M | int16_t* o = &mcbuffer[y+extra_top]; | 530 | | | 531 | 23.6M | for (int x=0;x<nPbW;x++) { | 532 | 21.6M | *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>shift1; | 533 | 21.6M | o += nPbH_extra; | 534 | 21.6M | p++; | 535 | 21.6M | } | 536 | 1.91M | } | 537 | 135k | break; | 538 | 179k | case 3: | 539 | 2.66M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { | 540 | 2.48M | const pixel_t* p = src + srcstride*y - extra_left; | 541 | 2.48M | int16_t* o = &mcbuffer[y+extra_top]; | 542 | | | 543 | 32.5M | for (int x=0;x<nPbW;x++) { | 544 | 30.1M | *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>shift1; | 545 | 30.1M | o += nPbH_extra; | 546 | 30.1M | p++; | 547 | 30.1M | } | 548 | 2.48M | } | 549 | 179k | break; | 550 | 625k | } | 551 | | | 552 | | | 553 | 625k | logtrace(LogMotion,"---H---\n"); | 554 | | | 555 | 9.51M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { | 556 | 113M | for (int x=0;x<nPbW;x++) { | 557 | 105M | logtrace(LogMotion,"%04x ",mcbuffer[y+extra_top + x*nPbH_extra]); | 558 | 105M | } | 559 | 8.89M | logtrace(LogMotion,"\n"); | 560 | 8.89M | } | 561 | | | 562 | | // V-filters | 563 | | | 564 | 625k | int vshift = (xFracL==0 ? shift1 : shift2); | 565 | | | 566 | 625k | switch (yFracL) { | 567 | 127k | case 0: | 568 | 1.31M | for (int x=0;x<nPbW;x++) { | 569 | 1.18M | const int16_t* p = &mcbuffer[x*nPbH_extra]; | 570 | 1.18M | int16_t* o = &out[x]; | 571 | | | 572 | 15.2M | for (int y=0;y<nPbH;y++) { | 573 | 14.0M | *o = *p; | 574 | 14.0M | o+=out_stride; | 575 | 14.0M | p++; | 576 | 14.0M | } | 577 | 1.18M | } | 578 | 127k | break; | 579 | 198k | case 1: | 580 | 2.14M | for (int x=0;x<nPbW;x++) { | 581 | 1.94M | const int16_t* p = &mcbuffer[x*nPbH_extra]; | 582 | 1.94M | int16_t* o = &out[x]; | 583 | | | 584 | 24.9M | for (int y=0;y<nPbH;y++) { | 585 | 22.9M | *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>vshift; | 586 | 22.9M | o+=out_stride; | 587 | 22.9M | p++; | 588 | 22.9M | } | 589 | 1.94M | } | 590 | 198k | break; | 591 | 125k | case 2: | 592 | 1.39M | for (int x=0;x<nPbW;x++) { | 593 | 1.27M | const int16_t* p = &mcbuffer[x*nPbH_extra]; | 594 | 1.27M | int16_t* o = &out[x]; | 595 | | | 596 | 17.8M | for (int y=0;y<nPbH;y++) { | 597 | 16.6M | *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>vshift; | 598 | 16.6M | o+=out_stride; | 599 | 16.6M | p++; | 600 | 16.6M | } | 601 | 1.27M | } | 602 | 125k | break; | 603 | 173k | case 3: | 604 | 1.86M | for (int x=0;x<nPbW;x++) { | 605 | 1.68M | const int16_t* p = &mcbuffer[x*nPbH_extra]; | 606 | 1.68M | int16_t* o = &out[x]; | 607 | | | 608 | 22.3M | for (int y=0;y<nPbH;y++) { | 609 | 20.6M | *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>vshift; | 610 | 20.6M | o+=out_stride; | 611 | 20.6M | p++; | 612 | 20.6M | } | 613 | 1.68M | } | 614 | 173k | break; | 615 | 625k | } | 616 | | | 617 | | | 618 | 625k | logtrace(LogMotion,"---V---\n"); | 619 | 6.40M | for (int y=0;y<nPbH;y++) { | 620 | 80.0M | for (int x=0;x<nPbW;x++) { | 621 | 74.3M | logtrace(LogMotion,"%04x ",out[x+y*out_stride]); | 622 | 74.3M | } | 623 | 5.77M | logtrace(LogMotion,"\n"); | 624 | 5.77M | } | 625 | 625k | } |
|
626 | | |
627 | | |
628 | | |
629 | | #define QPEL(x,y) void put_qpel_ ## x ## _ ## y ## _fallback(int16_t *out, ptrdiff_t out_stride, \ |
630 | | const uint8_t *src, ptrdiff_t srcstride, \ |
631 | | int nPbW, int nPbH, int16_t* mcbuffer) \ |
632 | 0 | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, 8 ); } Unexecuted instantiation: put_qpel_0_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_0_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_0_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_3_fallback(short*, long, unsigned char const*, long, int, int, short*) |
633 | | |
634 | | |
635 | | #define QPEL16(x,y) void put_qpel_ ## x ## _ ## y ## _fallback_16(int16_t *out, ptrdiff_t out_stride, \ |
636 | | const uint16_t *src, ptrdiff_t srcstride, \ |
637 | 625k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ |
638 | 625k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } put_qpel_0_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 65.2k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 65.2k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_0_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 24.1k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 24.1k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_0_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 57.4k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 57.4k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_1_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 50.5k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 50.5k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_1_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 62.6k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 62.6k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_1_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 20.3k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 20.3k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_1_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 29.8k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 29.8k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_2_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 27.2k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 27.2k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_2_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 28.8k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 28.8k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_2_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 52.8k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 52.8k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_2_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 26.9k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 26.9k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_3_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 49.4k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 49.4k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_3_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 41.6k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 41.6k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_3_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 28.4k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 28.4k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
put_qpel_3_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Line | Count | Source | 637 | 59.6k | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ | 638 | 59.6k | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } |
|
639 | | |
640 | | /* */ QPEL(0,1) QPEL(0,2) QPEL(0,3) |
641 | | QPEL(1,0) QPEL(1,1) QPEL(1,2) QPEL(1,3) |
642 | | QPEL(2,0) QPEL(2,1) QPEL(2,2) QPEL(2,3) |
643 | | QPEL(3,0) QPEL(3,1) QPEL(3,2) QPEL(3,3) |
644 | | |
645 | | /* */ QPEL16(0,1) QPEL16(0,2) QPEL16(0,3) |
646 | | QPEL16(1,0) QPEL16(1,1) QPEL16(1,2) QPEL16(1,3) |
647 | | QPEL16(2,0) QPEL16(2,1) QPEL16(2,2) QPEL16(2,3) |
648 | | QPEL16(3,0) QPEL16(3,1) QPEL16(3,2) QPEL16(3,3) |