/src/libde265/libde265/fallback-motion.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "fallback-motion.h" |
22 | | #include "util.h" |
23 | | |
24 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
25 | | # include <malloc.h> |
26 | | #elif defined(HAVE_ALLOCA_H) |
27 | | # include <alloca.h> |
28 | | #endif |
29 | | |
30 | | #include <assert.h> |
31 | | |
32 | | |
33 | | void put_unweighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
34 | | const int16_t *src, ptrdiff_t srcstride, |
35 | | int width, int height) |
36 | 0 | { |
37 | 0 | int offset8bit = 32; |
38 | 0 | int shift8bit = 6; |
39 | |
|
40 | 0 | assert((width&1)==0); |
41 | | |
42 | 0 | for (int y=0;y<height;y++) { |
43 | 0 | const int16_t* in = &src[y*srcstride]; |
44 | 0 | uint8_t* out = &dst[y*dststride]; |
45 | |
|
46 | 0 | for (int x=0;x<width;x+=2) { |
47 | 0 | out[0] = Clip1_8bit((in[0] + offset8bit)>>shift8bit); |
48 | 0 | out[1] = Clip1_8bit((in[1] + offset8bit)>>shift8bit); |
49 | 0 | out+=2; in+=2; |
50 | 0 | } |
51 | 0 | } |
52 | 0 | } |
53 | | |
54 | | |
55 | | void put_weighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
56 | | const int16_t *src, ptrdiff_t srcstride, |
57 | | int width, int height, |
58 | | int w,int o,int log2WD) |
59 | 0 | { |
60 | 0 | assert(log2WD>=1); // TODO |
61 | | |
62 | 0 | const int rnd = (1<<(log2WD-1)); |
63 | |
|
64 | 0 | for (int y=0;y<height;y++) { |
65 | 0 | const int16_t* in = &src[y*srcstride]; |
66 | 0 | uint8_t* out = &dst[y*dststride]; |
67 | |
|
68 | 0 | for (int x=0;x<width;x++) { |
69 | 0 | out[0] = Clip1_8bit(((in[0]*w + rnd)>>log2WD) + o); |
70 | 0 | out++; in++; |
71 | 0 | } |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | | void put_weighted_bipred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
76 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
77 | | int width, int height, |
78 | | int w1,int o1, int w2,int o2, int log2WD) |
79 | 0 | { |
80 | 0 | assert(log2WD>=1); // TODO |
81 | | |
82 | 0 | const int rnd = ((o1+o2+1) << log2WD); |
83 | |
|
84 | 0 | for (int y=0;y<height;y++) { |
85 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
86 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
87 | 0 | uint8_t* out = &dst[y*dststride]; |
88 | |
|
89 | 0 | for (int x=0;x<width;x++) { |
90 | 0 | out[0] = Clip1_8bit((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1)); |
91 | 0 | out++; in1++; in2++; |
92 | 0 | } |
93 | 0 | } |
94 | 0 | } |
95 | | |
96 | | |
97 | | void put_weighted_pred_avg_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
98 | | const int16_t *src1, const int16_t *src2, |
99 | | ptrdiff_t srcstride, int width, |
100 | | int height) |
101 | 0 | { |
102 | 0 | int offset8bit = 64; |
103 | 0 | int shift8bit = 7; |
104 | |
|
105 | 0 | assert((width&1)==0); |
106 | | |
107 | | // I had a special case for 8-pixel parallel, unrolled code, |
108 | | // but I did not see any speedup. |
109 | | |
110 | | #if 0 |
111 | | for (int y=0;y<height;y++) { |
112 | | int16_t* in1 = &src1[y*srcstride]; |
113 | | int16_t* in2 = &src2[y*srcstride]; |
114 | | uint8_t* out = &dst[y*dststride]; |
115 | | |
116 | | for (int x=0;x<width;x++) { |
117 | | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
118 | | out++; in1++; in2++; |
119 | | } |
120 | | } |
121 | | #endif |
122 | | |
123 | | #if 0 |
124 | | if ((width&7)==0) { |
125 | | for (int y=0;y<height;y++) { |
126 | | int16_t* in1 = &src1[y*srcstride]; |
127 | | int16_t* in2 = &src2[y*srcstride]; |
128 | | uint8_t* out = &dst[y*dststride]; |
129 | | |
130 | | for (int x=0;x<width;x+=8) { |
131 | | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
132 | | out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit); |
133 | | out[2] = Clip1_8bit((in1[2] + in2[2] + offset8bit)>>shift8bit); |
134 | | out[3] = Clip1_8bit((in1[3] + in2[3] + offset8bit)>>shift8bit); |
135 | | out[4] = Clip1_8bit((in1[4] + in2[4] + offset8bit)>>shift8bit); |
136 | | out[5] = Clip1_8bit((in1[5] + in2[5] + offset8bit)>>shift8bit); |
137 | | out[6] = Clip1_8bit((in1[6] + in2[6] + offset8bit)>>shift8bit); |
138 | | out[7] = Clip1_8bit((in1[7] + in2[7] + offset8bit)>>shift8bit); |
139 | | out+=8; in1+=8; in2+=8; |
140 | | } |
141 | | } |
142 | | } |
143 | | else |
144 | | #endif |
145 | 0 | { |
146 | 0 | for (int y=0;y<height;y++) { |
147 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
148 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
149 | 0 | uint8_t* out = &dst[y*dststride]; |
150 | |
|
151 | 0 | for (int x=0;x<width;x+=2) { |
152 | 0 | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
153 | 0 | out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit); |
154 | 0 | out+=2; in1+=2; in2+=2; |
155 | 0 | } |
156 | 0 | } |
157 | 0 | } |
158 | 0 | } |
159 | | |
160 | | |
161 | | |
162 | | |
163 | | |
164 | | void put_unweighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
165 | | const int16_t *src, ptrdiff_t srcstride, |
166 | | int width, int height, int bit_depth) |
167 | 0 | { |
168 | 0 | int shift1 = 14-bit_depth; |
169 | 0 | int offset1 = 0; |
170 | 0 | if (shift1>0) { offset1 = 1<<(shift1-1); } |
171 | |
|
172 | 0 | assert((width&1)==0); |
173 | | |
174 | 0 | for (int y=0;y<height;y++) { |
175 | 0 | const int16_t* in = &src[y*srcstride]; |
176 | 0 | uint16_t* out = &dst[y*dststride]; |
177 | |
|
178 | 0 | for (int x=0;x<width;x+=2) { |
179 | 0 | out[0] = Clip_BitDepth((in[0] + offset1)>>shift1, bit_depth); |
180 | 0 | out[1] = Clip_BitDepth((in[1] + offset1)>>shift1, bit_depth); |
181 | 0 | out+=2; in+=2; |
182 | 0 | } |
183 | 0 | } |
184 | 0 | } |
185 | | |
186 | | #include <stdlib.h> |
187 | | |
188 | | void put_weighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
189 | | const int16_t *src, ptrdiff_t srcstride, |
190 | | int width, int height, |
191 | | int w,int o,int log2WD, int bit_depth) |
192 | 0 | { |
193 | 0 | assert(log2WD>=1); // TODO |
194 | | |
195 | 0 | const int rnd = (1<<(log2WD-1)); |
196 | |
|
197 | 0 | for (int y=0;y<height;y++) { |
198 | 0 | const int16_t* in = &src[y*srcstride]; |
199 | 0 | uint16_t* out = &dst[y*dststride]; |
200 | |
|
201 | 0 | for (int x=0;x<width;x++) { |
202 | 0 | out[0] = Clip_BitDepth(((in[0]*w + rnd)>>log2WD) + o, bit_depth); |
203 | 0 | out++; in++; |
204 | 0 | } |
205 | 0 | } |
206 | 0 | } |
207 | | |
208 | | void put_weighted_bipred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
209 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
210 | | int width, int height, |
211 | | int w1,int o1, int w2,int o2, int log2WD, int bit_depth) |
212 | 0 | { |
213 | 0 | assert(log2WD>=1); // TODO |
214 | | |
215 | 0 | const int rnd = ((o1+o2+1) << log2WD); |
216 | |
|
217 | 0 | for (int y=0;y<height;y++) { |
218 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
219 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
220 | 0 | uint16_t* out = &dst[y*dststride]; |
221 | |
|
222 | 0 | for (int x=0;x<width;x++) { |
223 | 0 | out[0] = Clip_BitDepth((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1), bit_depth); |
224 | 0 | out++; in1++; in2++; |
225 | 0 | } |
226 | 0 | } |
227 | 0 | } |
228 | | |
229 | | |
230 | | void put_weighted_pred_avg_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
231 | | const int16_t *src1, const int16_t *src2, |
232 | | ptrdiff_t srcstride, int width, |
233 | | int height, int bit_depth) |
234 | 0 | { |
235 | 0 | int shift2 = 15-bit_depth; |
236 | 0 | int offset2 = 1<<(shift2-1); |
237 | |
|
238 | 0 | assert((width&1)==0); |
239 | | |
240 | 0 | for (int y=0;y<height;y++) { |
241 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
242 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
243 | 0 | uint16_t* out = &dst[y*dststride]; |
244 | |
|
245 | 0 | for (int x=0;x<width;x+=2) { |
246 | 0 | out[0] = Clip_BitDepth((in1[0] + in2[0] + offset2)>>shift2, bit_depth); |
247 | 0 | out[1] = Clip_BitDepth((in1[1] + in2[1] + offset2)>>shift2, bit_depth); |
248 | 0 | out+=2; in1+=2; in2+=2; |
249 | 0 | } |
250 | 0 | } |
251 | 0 | } |
252 | | |
253 | | |
254 | | |
255 | | |
256 | | |
257 | | void put_epel_8_fallback(int16_t *out, ptrdiff_t out_stride, |
258 | | const uint8_t *src, ptrdiff_t src_stride, |
259 | | int width, int height, |
260 | | int mx, int my, int16_t* mcbuffer) |
261 | 0 | { |
262 | 0 | int shift3 = 6; |
263 | |
|
264 | 0 | for (int y=0;y<height;y++) { |
265 | 0 | int16_t* o = &out[y*out_stride]; |
266 | 0 | const uint8_t* i = &src[y*src_stride]; |
267 | |
|
268 | 0 | for (int x=0;x<width;x++) { |
269 | 0 | *o = *i << shift3; |
270 | 0 | o++; |
271 | 0 | i++; |
272 | 0 | } |
273 | 0 | } |
274 | 0 | } |
275 | | |
276 | | |
277 | | void put_epel_16_fallback(int16_t *out, ptrdiff_t out_stride, |
278 | | const uint16_t *src, ptrdiff_t src_stride, |
279 | | int width, int height, |
280 | | int mx, int my, int16_t* mcbuffer, int bit_depth) |
281 | 0 | { |
282 | 0 | int shift3 = 14 - bit_depth; |
283 | |
|
284 | 0 | for (int y=0;y<height;y++) { |
285 | 0 | int16_t* o = &out[y*out_stride]; |
286 | 0 | const uint16_t* i = &src[y*src_stride]; |
287 | |
|
288 | 0 | for (int x=0;x<width;x++) { |
289 | 0 | *o = *i << shift3; |
290 | 0 | o++; |
291 | 0 | i++; |
292 | 0 | } |
293 | 0 | } |
294 | 0 | } |
295 | | |
296 | | |
297 | | template <class pixel_t> |
298 | | void put_epel_hv_fallback(int16_t *dst, ptrdiff_t dst_stride, |
299 | | const pixel_t *src, ptrdiff_t src_stride, |
300 | | int nPbWC, int nPbHC, |
301 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth) |
302 | 0 | { |
303 | 0 | const int shift1 = bit_depth-8; |
304 | 0 | const int shift2 = 6; |
305 | | //const int shift3 = 6; |
306 | |
|
307 | 0 | int extra_left = 1; |
308 | 0 | int extra_top = 1; |
309 | | // int extra_right = 2; |
310 | 0 | int extra_bottom= 2; |
311 | | |
312 | |
|
313 | 0 | int nPbH_extra = extra_top + nPbHC + extra_bottom; |
314 | |
|
315 | 0 | int16_t* tmp2buf = (int16_t*)alloca( nPbWC * nPbH_extra * sizeof(int16_t) ); |
316 | | |
317 | | /* |
318 | | int nPbW_extra = extra_left + nPbWC + extra_right; |
319 | | |
320 | | |
321 | | printf("x,y FracC: %d/%d\n",xFracC,yFracC); |
322 | | |
323 | | printf("---IN---\n"); |
324 | | |
325 | | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { |
326 | | uint8_t* p = &src[y*src_stride -extra_left]; |
327 | | |
328 | | for (int x=-extra_left;x<nPbWC+extra_right;x++) { |
329 | | printf("%05d ",*p << 6); |
330 | | p++; |
331 | | } |
332 | | printf("\n"); |
333 | | } |
334 | | */ |
335 | | |
336 | | |
337 | | // H-filters |
338 | |
|
339 | 0 | logtrace(LogMotion,"---H---\n"); |
340 | | //printf("---H---(%d)\n",xFracC); |
341 | |
|
342 | 0 | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { |
343 | 0 | const pixel_t* p = &src[y*src_stride - extra_left]; |
344 | |
|
345 | 0 | for (int x=0;x<nPbWC;x++) { |
346 | 0 | int16_t v; |
347 | 0 | switch (xFracC) { |
348 | 0 | case 0: v = p[1]; break; |
349 | 0 | case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>shift1; break; |
350 | 0 | case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>shift1; break; |
351 | 0 | case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>shift1; break; |
352 | 0 | case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>shift1; break; |
353 | 0 | case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>shift1; break; |
354 | 0 | case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>shift1; break; |
355 | 0 | default: |
356 | 0 | case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>shift1; break; |
357 | 0 | } |
358 | | |
359 | | //printf("%d %d %d %d -> %d\n",p[0],p[1],p[2],p[3],v); |
360 | | |
361 | 0 | tmp2buf[y+extra_top + x*nPbH_extra] = v; |
362 | 0 | p++; |
363 | | |
364 | | //printf("%05d ",tmp2buf[y+extra_top + x*nPbH_extra]); |
365 | 0 | } |
366 | | //printf("\n"); |
367 | 0 | } |
368 | | |
369 | | // V-filters |
370 | | |
371 | 0 | int vshift = (xFracC==0 ? shift1 : shift2); |
372 | |
|
373 | 0 | for (int x=0;x<nPbWC;x++) { |
374 | 0 | int16_t* p = &tmp2buf[x*nPbH_extra]; |
375 | |
|
376 | 0 | for (int y=0;y<nPbHC;y++) { |
377 | 0 | int16_t v; |
378 | | //logtrace(LogMotion,"%x %x %x %x %x %x %x\n",p[0],p[1],p[2],p[3],p[4],p[5],p[6]); |
379 | |
|
380 | 0 | switch (yFracC) { |
381 | 0 | case 0: v = p[1]; break; |
382 | 0 | case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>vshift; break; |
383 | 0 | case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>vshift; break; |
384 | 0 | case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>vshift; break; |
385 | 0 | case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>vshift; break; |
386 | 0 | case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>vshift; break; |
387 | 0 | case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>vshift; break; |
388 | 0 | default: |
389 | 0 | case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>vshift; break; |
390 | 0 | } |
391 | | |
392 | 0 | dst[x + y*dst_stride] = v; |
393 | 0 | p++; |
394 | 0 | } |
395 | |
|
396 | 0 | } |
397 | | |
398 | | /* |
399 | | printf("---V---\n"); |
400 | | for (int y=0;y<nPbHC;y++) { |
401 | | for (int x=0;x<nPbWC;x++) { |
402 | | printf("%05d ",dst[x+y*dst_stride]); |
403 | | } |
404 | | printf("\n"); |
405 | | } |
406 | | */ |
407 | 0 | } Unexecuted instantiation: void put_epel_hv_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, int, int, short*, int) Unexecuted instantiation: void put_epel_hv_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, int, int, short*, int) |
408 | | |
409 | | |
410 | | template |
411 | | void put_epel_hv_fallback<uint8_t>(int16_t *dst, ptrdiff_t dst_stride, |
412 | | const uint8_t *src, ptrdiff_t src_stride, |
413 | | int nPbWC, int nPbHC, |
414 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth); |
415 | | template |
416 | | void put_epel_hv_fallback<uint16_t>(int16_t *dst, ptrdiff_t dst_stride, |
417 | | const uint16_t *src, ptrdiff_t src_stride, |
418 | | int nPbWC, int nPbHC, |
419 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth); |
420 | | |
421 | | |
422 | | |
423 | | void put_qpel_0_0_fallback(int16_t *out, ptrdiff_t out_stride, |
424 | | const uint8_t *src, ptrdiff_t srcstride, |
425 | | int nPbW, int nPbH, int16_t* mcbuffer) |
426 | 0 | { |
427 | | //const int shift1 = 0; // sps->BitDepth_Y-8; |
428 | 0 | const int shift2 = 6; |
429 | | |
430 | | // straight copy |
431 | |
|
432 | 0 | for (int y=0;y<nPbH;y++) { |
433 | 0 | const uint8_t* p = src + srcstride*y; |
434 | 0 | int16_t* o = out + out_stride*y; |
435 | |
|
436 | 0 | for (int x=0;x<nPbW;x+=4) { |
437 | | |
438 | | // does not seem to be faster... |
439 | 0 | int16_t o0,o1,o2,o3; |
440 | 0 | o0 = p[0] << shift2; |
441 | 0 | o1 = p[1] << shift2; |
442 | 0 | o2 = p[2] << shift2; |
443 | 0 | o3 = p[3] << shift2; |
444 | 0 | o[0]=o0; |
445 | 0 | o[1]=o1; |
446 | 0 | o[2]=o2; |
447 | 0 | o[3]=o3; |
448 | |
|
449 | 0 | o+=4; |
450 | 0 | p+=4; |
451 | 0 | } |
452 | 0 | } |
453 | 0 | } |
454 | | |
455 | | |
456 | | void put_qpel_0_0_fallback_16(int16_t *out, ptrdiff_t out_stride, |
457 | | const uint16_t *src, ptrdiff_t srcstride, |
458 | | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) |
459 | 0 | { |
460 | | //const int shift1 = bit_depth-8; |
461 | | //const int shift2 = 6; |
462 | 0 | const int shift3 = 14-bit_depth; |
463 | | |
464 | | // straight copy |
465 | |
|
466 | 0 | for (int y=0;y<nPbH;y++) { |
467 | 0 | const uint16_t* p = src + srcstride*y; |
468 | 0 | int16_t* o = out + out_stride*y; |
469 | |
|
470 | 0 | for (int x=0;x<nPbW;x++) { |
471 | 0 | *o++ = *p++ << shift3; |
472 | 0 | } |
473 | 0 | } |
474 | 0 | } |
475 | | |
476 | | |
477 | | |
478 | | static int extra_before[4] = { 0,3,3,2 }; |
479 | | static int extra_after [4] = { 0,3,4,4 }; |
480 | | |
481 | | template <class pixel_t> |
482 | | void put_qpel_fallback(int16_t *out, ptrdiff_t out_stride, |
483 | | const pixel_t *src, ptrdiff_t srcstride, |
484 | | int nPbW, int nPbH, int16_t* mcbuffer, |
485 | | int xFracL, int yFracL, int bit_depth) |
486 | 0 | { |
487 | 0 | int extra_left = extra_before[xFracL]; |
488 | | //int extra_right = extra_after [xFracL]; |
489 | 0 | int extra_top = extra_before[yFracL]; |
490 | 0 | int extra_bottom = extra_after [yFracL]; |
491 | | |
492 | | //int nPbW_extra = extra_left + nPbW + extra_right; |
493 | 0 | int nPbH_extra = extra_top + nPbH + extra_bottom; |
494 | |
|
495 | 0 | const int shift1 = bit_depth-8; |
496 | 0 | const int shift2 = 6; |
497 | | |
498 | | |
499 | | // H-filters |
500 | |
|
501 | 0 | switch (xFracL) { |
502 | 0 | case 0: |
503 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
504 | 0 | const pixel_t* p = src + srcstride*y - extra_left; |
505 | 0 | int16_t* o = &mcbuffer[y+extra_top]; |
506 | |
|
507 | 0 | for (int x=0;x<nPbW;x++) { |
508 | 0 | *o = *p; |
509 | 0 | o += nPbH_extra; |
510 | 0 | p++; |
511 | 0 | } |
512 | 0 | } |
513 | 0 | break; |
514 | 0 | case 1: |
515 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
516 | 0 | const pixel_t* p = src + srcstride*y - extra_left; |
517 | 0 | int16_t* o = &mcbuffer[y+extra_top]; |
518 | |
|
519 | 0 | for (int x=0;x<nPbW;x++) { |
520 | 0 | *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>shift1; |
521 | 0 | o += nPbH_extra; |
522 | 0 | p++; |
523 | 0 | } |
524 | 0 | } |
525 | 0 | break; |
526 | 0 | case 2: |
527 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
528 | 0 | const pixel_t* p = src + srcstride*y - extra_left; |
529 | 0 | int16_t* o = &mcbuffer[y+extra_top]; |
530 | |
|
531 | 0 | for (int x=0;x<nPbW;x++) { |
532 | 0 | *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>shift1; |
533 | 0 | o += nPbH_extra; |
534 | 0 | p++; |
535 | 0 | } |
536 | 0 | } |
537 | 0 | break; |
538 | 0 | case 3: |
539 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
540 | 0 | const pixel_t* p = src + srcstride*y - extra_left; |
541 | 0 | int16_t* o = &mcbuffer[y+extra_top]; |
542 | |
|
543 | 0 | for (int x=0;x<nPbW;x++) { |
544 | 0 | *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>shift1; |
545 | 0 | o += nPbH_extra; |
546 | 0 | p++; |
547 | 0 | } |
548 | 0 | } |
549 | 0 | break; |
550 | 0 | } |
551 | | |
552 | | |
553 | 0 | logtrace(LogMotion,"---H---\n"); |
554 | |
|
555 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
556 | 0 | for (int x=0;x<nPbW;x++) { |
557 | 0 | logtrace(LogMotion,"%04x ",mcbuffer[y+extra_top + x*nPbH_extra]); |
558 | 0 | } |
559 | 0 | logtrace(LogMotion,"\n"); |
560 | 0 | } |
561 | | |
562 | | // V-filters |
563 | |
|
564 | 0 | int vshift = (xFracL==0 ? shift1 : shift2); |
565 | |
|
566 | 0 | switch (yFracL) { |
567 | 0 | case 0: |
568 | 0 | for (int x=0;x<nPbW;x++) { |
569 | 0 | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
570 | 0 | int16_t* o = &out[x]; |
571 | |
|
572 | 0 | for (int y=0;y<nPbH;y++) { |
573 | 0 | *o = *p; |
574 | 0 | o+=out_stride; |
575 | 0 | p++; |
576 | 0 | } |
577 | 0 | } |
578 | 0 | break; |
579 | 0 | case 1: |
580 | 0 | for (int x=0;x<nPbW;x++) { |
581 | 0 | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
582 | 0 | int16_t* o = &out[x]; |
583 | |
|
584 | 0 | for (int y=0;y<nPbH;y++) { |
585 | 0 | *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>vshift; |
586 | 0 | o+=out_stride; |
587 | 0 | p++; |
588 | 0 | } |
589 | 0 | } |
590 | 0 | break; |
591 | 0 | case 2: |
592 | 0 | for (int x=0;x<nPbW;x++) { |
593 | 0 | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
594 | 0 | int16_t* o = &out[x]; |
595 | |
|
596 | 0 | for (int y=0;y<nPbH;y++) { |
597 | 0 | *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>vshift; |
598 | 0 | o+=out_stride; |
599 | 0 | p++; |
600 | 0 | } |
601 | 0 | } |
602 | 0 | break; |
603 | 0 | case 3: |
604 | 0 | for (int x=0;x<nPbW;x++) { |
605 | 0 | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
606 | 0 | int16_t* o = &out[x]; |
607 | |
|
608 | 0 | for (int y=0;y<nPbH;y++) { |
609 | 0 | *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>vshift; |
610 | 0 | o+=out_stride; |
611 | 0 | p++; |
612 | 0 | } |
613 | 0 | } |
614 | 0 | break; |
615 | 0 | } |
616 | | |
617 | | |
618 | 0 | logtrace(LogMotion,"---V---\n"); |
619 | 0 | for (int y=0;y<nPbH;y++) { |
620 | 0 | for (int x=0;x<nPbW;x++) { |
621 | 0 | logtrace(LogMotion,"%04x ",out[x+y*out_stride]); |
622 | 0 | } |
623 | 0 | logtrace(LogMotion,"\n"); |
624 | 0 | } |
625 | 0 | } Unexecuted instantiation: void put_qpel_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, short*, int, int, int) Unexecuted instantiation: void put_qpel_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, short*, int, int, int) |
626 | | |
627 | | |
628 | | |
629 | | #define QPEL(x,y) void put_qpel_ ## x ## _ ## y ## _fallback(int16_t *out, ptrdiff_t out_stride, \ |
630 | | const uint8_t *src, ptrdiff_t srcstride, \ |
631 | | int nPbW, int nPbH, int16_t* mcbuffer) \ |
632 | 0 | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, 8 ); } Unexecuted instantiation: put_qpel_0_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_0_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_0_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_3_fallback(short*, long, unsigned char const*, long, int, int, short*) |
633 | | |
634 | | |
635 | | #define QPEL16(x,y) void put_qpel_ ## x ## _ ## y ## _fallback_16(int16_t *out, ptrdiff_t out_stride, \ |
636 | | const uint16_t *src, ptrdiff_t srcstride, \ |
637 | 0 | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ |
638 | 0 | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); } Unexecuted instantiation: put_qpel_0_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_0_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_0_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_1_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_1_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_1_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_1_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_2_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_2_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_2_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_2_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_3_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_3_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_3_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_3_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) |
639 | | |
640 | | /* */ QPEL(0,1) QPEL(0,2) QPEL(0,3) |
641 | | QPEL(1,0) QPEL(1,1) QPEL(1,2) QPEL(1,3) |
642 | | QPEL(2,0) QPEL(2,1) QPEL(2,2) QPEL(2,3) |
643 | | QPEL(3,0) QPEL(3,1) QPEL(3,2) QPEL(3,3) |
644 | | |
645 | | /* */ QPEL16(0,1) QPEL16(0,2) QPEL16(0,3) |
646 | | QPEL16(1,0) QPEL16(1,1) QPEL16(1,2) QPEL16(1,3) |
647 | | QPEL16(2,0) QPEL16(2,1) QPEL16(2,2) QPEL16(2,3) |
648 | | QPEL16(3,0) QPEL16(3,1) QPEL16(3,2) QPEL16(3,3) |