/work/libde265/libde265/fallback-motion.cc
Line | Count | Source |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "fallback-motion.h" |
22 | | #include "util.h" |
23 | | |
24 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
25 | | # include <malloc.h> |
26 | | #elif defined(HAVE_ALLOCA_H) |
27 | | # include <alloca.h> |
28 | | #endif |
29 | | |
30 | | #include <assert.h> |
31 | | |
32 | | |
33 | | void put_unweighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
34 | | const int16_t *src, ptrdiff_t srcstride, |
35 | | int width, int height) |
36 | 0 | { |
37 | 0 | int offset8bit = 32; |
38 | 0 | int shift8bit = 6; |
39 | |
|
40 | 0 | assert((width&1)==0); |
41 | | |
42 | 0 | for (int y=0;y<height;y++) { |
43 | 0 | const int16_t* in = &src[y*srcstride]; |
44 | 0 | uint8_t* out = &dst[y*dststride]; |
45 | |
|
46 | 0 | for (int x=0;x<width;x+=2) { |
47 | 0 | out[0] = Clip1_8bit((in[0] + offset8bit)>>shift8bit); |
48 | 0 | out[1] = Clip1_8bit((in[1] + offset8bit)>>shift8bit); |
49 | 0 | out+=2; in+=2; |
50 | 0 | } |
51 | 0 | } |
52 | 0 | } |
53 | | |
54 | | |
55 | | void put_weighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
56 | | const int16_t *src, ptrdiff_t srcstride, |
57 | | int width, int height, |
58 | | int w,int o,int log2WD) |
59 | 0 | { |
60 | 0 | assert(log2WD>=1); // TODO |
61 | | |
62 | 0 | const int rnd = (1<<(log2WD-1)); |
63 | |
|
64 | 0 | for (int y=0;y<height;y++) { |
65 | 0 | const int16_t* in = &src[y*srcstride]; |
66 | 0 | uint8_t* out = &dst[y*dststride]; |
67 | |
|
68 | 0 | for (int x=0;x<width;x++) { |
69 | 0 | out[0] = Clip1_8bit(((in[0]*w + rnd)>>log2WD) + o); |
70 | 0 | out++; in++; |
71 | 0 | } |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | | void put_weighted_bipred_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
76 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
77 | | int width, int height, |
78 | | int w1,int o1, int w2,int o2, int log2WD) |
79 | 0 | { |
80 | 0 | assert(log2WD>=1); // TODO |
81 | | |
82 | 0 | const int rnd = static_cast<int>(static_cast<unsigned int>(o1+o2+1) << log2WD); |
83 | |
|
84 | 0 | for (int y=0;y<height;y++) { |
85 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
86 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
87 | 0 | uint8_t* out = &dst[y*dststride]; |
88 | |
|
89 | 0 | for (int x=0;x<width;x++) { |
90 | 0 | out[0] = Clip1_8bit((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1)); |
91 | 0 | out++; in1++; in2++; |
92 | 0 | } |
93 | 0 | } |
94 | 0 | } |
95 | | |
96 | | |
97 | | void put_weighted_pred_avg_8_fallback(uint8_t *dst, ptrdiff_t dststride, |
98 | | const int16_t *src1, const int16_t *src2, |
99 | | ptrdiff_t srcstride, int width, |
100 | | int height) |
101 | 0 | { |
102 | 0 | int offset8bit = 64; |
103 | 0 | int shift8bit = 7; |
104 | |
|
105 | 0 | assert((width&1)==0); |
106 | | |
107 | | // I had a special case for 8-pixel parallel, unrolled code, |
108 | | // but I did not see any speedup. |
109 | | |
110 | | #if 0 |
111 | | for (int y=0;y<height;y++) { |
112 | | int16_t* in1 = &src1[y*srcstride]; |
113 | | int16_t* in2 = &src2[y*srcstride]; |
114 | | uint8_t* out = &dst[y*dststride]; |
115 | | |
116 | | for (int x=0;x<width;x++) { |
117 | | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
118 | | out++; in1++; in2++; |
119 | | } |
120 | | } |
121 | | #endif |
122 | | |
123 | | #if 0 |
124 | | if ((width&7)==0) { |
125 | | for (int y=0;y<height;y++) { |
126 | | int16_t* in1 = &src1[y*srcstride]; |
127 | | int16_t* in2 = &src2[y*srcstride]; |
128 | | uint8_t* out = &dst[y*dststride]; |
129 | | |
130 | | for (int x=0;x<width;x+=8) { |
131 | | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
132 | | out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit); |
133 | | out[2] = Clip1_8bit((in1[2] + in2[2] + offset8bit)>>shift8bit); |
134 | | out[3] = Clip1_8bit((in1[3] + in2[3] + offset8bit)>>shift8bit); |
135 | | out[4] = Clip1_8bit((in1[4] + in2[4] + offset8bit)>>shift8bit); |
136 | | out[5] = Clip1_8bit((in1[5] + in2[5] + offset8bit)>>shift8bit); |
137 | | out[6] = Clip1_8bit((in1[6] + in2[6] + offset8bit)>>shift8bit); |
138 | | out[7] = Clip1_8bit((in1[7] + in2[7] + offset8bit)>>shift8bit); |
139 | | out+=8; in1+=8; in2+=8; |
140 | | } |
141 | | } |
142 | | } |
143 | | else |
144 | | #endif |
145 | 0 | { |
146 | 0 | for (int y=0;y<height;y++) { |
147 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
148 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
149 | 0 | uint8_t* out = &dst[y*dststride]; |
150 | |
|
151 | 0 | for (int x=0;x<width;x+=2) { |
152 | 0 | out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit); |
153 | 0 | out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit); |
154 | 0 | out+=2; in1+=2; in2+=2; |
155 | 0 | } |
156 | 0 | } |
157 | 0 | } |
158 | 0 | } |
159 | | |
160 | | |
161 | | |
162 | | |
163 | | |
164 | | void put_unweighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
165 | | const int16_t *src, ptrdiff_t srcstride, |
166 | | int width, int height, int bit_depth) |
167 | 0 | { |
168 | | // shift1 per HEVC v2 (10/2014) spec 8.5.3.3.4.2: Max(2, 14 - BitDepth). |
169 | | // The Max() was added with the Range Extensions in v2 to handle BitDepth up to 16; |
170 | | // the v1 (04/2013) formula was just (14 - BitDepth), valid only for BitDepth <= 14. |
171 | 0 | int shift1 = std::max(2, 14-bit_depth); |
172 | 0 | int offset1 = 1<<(shift1-1); |
173 | |
|
174 | 0 | assert((width&1)==0); |
175 | | |
176 | 0 | for (int y=0;y<height;y++) { |
177 | 0 | const int16_t* in = &src[y*srcstride]; |
178 | 0 | uint16_t* out = &dst[y*dststride]; |
179 | |
|
180 | 0 | for (int x=0;x<width;x+=2) { |
181 | 0 | out[0] = Clip_BitDepth((in[0] + offset1)>>shift1, bit_depth); |
182 | 0 | out[1] = Clip_BitDepth((in[1] + offset1)>>shift1, bit_depth); |
183 | 0 | out+=2; in+=2; |
184 | 0 | } |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | #include <stdlib.h> |
189 | | |
190 | | void put_weighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
191 | | const int16_t *src, ptrdiff_t srcstride, |
192 | | int width, int height, |
193 | | int w,int o,int log2WD, int bit_depth) |
194 | 0 | { |
195 | 0 | assert(log2WD>=1); // TODO |
196 | | |
197 | 0 | const int rnd = (1<<(log2WD-1)); |
198 | |
|
199 | 0 | for (int y=0;y<height;y++) { |
200 | 0 | const int16_t* in = &src[y*srcstride]; |
201 | 0 | uint16_t* out = &dst[y*dststride]; |
202 | |
|
203 | 0 | for (int x=0;x<width;x++) { |
204 | 0 | out[0] = Clip_BitDepth(((in[0]*w + rnd)>>log2WD) + o, bit_depth); |
205 | 0 | out++; in++; |
206 | 0 | } |
207 | 0 | } |
208 | 0 | } |
209 | | |
210 | | void put_weighted_bipred_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
211 | | const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride, |
212 | | int width, int height, |
213 | | int w1,int o1, int w2,int o2, int log2WD, int bit_depth) |
214 | 0 | { |
215 | 0 | assert(log2WD>=1); // TODO |
216 | | |
217 | 0 | const int rnd = static_cast<int>(static_cast<unsigned int>(o1+o2+1) << log2WD); |
218 | |
|
219 | 0 | for (int y=0;y<height;y++) { |
220 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
221 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
222 | 0 | uint16_t* out = &dst[y*dststride]; |
223 | |
|
224 | 0 | for (int x=0;x<width;x++) { |
225 | 0 | out[0] = Clip_BitDepth((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1), bit_depth); |
226 | 0 | out++; in1++; in2++; |
227 | 0 | } |
228 | 0 | } |
229 | 0 | } |
230 | | |
231 | | |
232 | | void put_weighted_pred_avg_16_fallback(uint16_t *dst, ptrdiff_t dststride, |
233 | | const int16_t *src1, const int16_t *src2, |
234 | | ptrdiff_t srcstride, int width, |
235 | | int height, int bit_depth) |
236 | 0 | { |
237 | | // shift2 per HEVC v2 (10/2014) spec 8.5.3.3.4.2: Max(3, 15 - BitDepth). |
238 | | // The Max() was added with the Range Extensions in v2 to handle BitDepth up to 16; |
239 | | // the v1 (04/2013) formula was just (15 - BitDepth), valid only for BitDepth <= 14. |
240 | 0 | int shift2 = std::max(3, 15-bit_depth); |
241 | 0 | int offset2 = 1<<(shift2-1); |
242 | |
|
243 | 0 | assert((width&1)==0); |
244 | | |
245 | 0 | for (int y=0;y<height;y++) { |
246 | 0 | const int16_t* in1 = &src1[y*srcstride]; |
247 | 0 | const int16_t* in2 = &src2[y*srcstride]; |
248 | 0 | uint16_t* out = &dst[y*dststride]; |
249 | |
|
250 | 0 | for (int x=0;x<width;x+=2) { |
251 | 0 | out[0] = Clip_BitDepth((in1[0] + in2[0] + offset2)>>shift2, bit_depth); |
252 | 0 | out[1] = Clip_BitDepth((in1[1] + in2[1] + offset2)>>shift2, bit_depth); |
253 | 0 | out+=2; in1+=2; in2+=2; |
254 | 0 | } |
255 | 0 | } |
256 | 0 | } |
257 | | |
258 | | |
259 | | |
260 | | |
261 | | |
262 | | void put_epel_8_fallback(int16_t *out, ptrdiff_t out_stride, |
263 | | const uint8_t *src, ptrdiff_t src_stride, |
264 | | int width, int height, |
265 | | int mx, int my, int16_t* mcbuffer) |
266 | 0 | { |
267 | 0 | int shift3 = 6; |
268 | |
|
269 | 0 | for (int y=0;y<height;y++) { |
270 | 0 | int16_t* o = &out[y*out_stride]; |
271 | 0 | const uint8_t* i = &src[y*src_stride]; |
272 | |
|
273 | 0 | for (int x=0;x<width;x++) { |
274 | 0 | *o = *i << shift3; |
275 | 0 | o++; |
276 | 0 | i++; |
277 | 0 | } |
278 | 0 | } |
279 | 0 | } |
280 | | |
281 | | |
282 | | void put_epel_16_fallback(int16_t *out, ptrdiff_t out_stride, |
283 | | const uint16_t *src, ptrdiff_t src_stride, |
284 | | int width, int height, |
285 | | int mx, int my, int16_t* mcbuffer, int bit_depth) |
286 | 0 | { |
287 | | // shift3 per HEVC v2 (10/2014) spec 8.5.3.3.3.3 (chroma): Max(2, 14 - BitDepth). |
288 | | // The Max() was added with the Range Extensions in v2 to handle BitDepth up to 16; |
289 | | // the v1 (04/2013) formula was just (14 - BitDepth), valid only for BitDepth <= 14. |
290 | 0 | int shift3 = std::max(2, 14 - bit_depth); |
291 | |
|
292 | 0 | for (int y=0;y<height;y++) { |
293 | 0 | int16_t* o = &out[y*out_stride]; |
294 | 0 | const uint16_t* i = &src[y*src_stride]; |
295 | |
|
296 | 0 | for (int x=0;x<width;x++) { |
297 | 0 | *o = *i << shift3; |
298 | 0 | o++; |
299 | 0 | i++; |
300 | 0 | } |
301 | 0 | } |
302 | 0 | } |
303 | | |
304 | | |
305 | | template <class pixel_t> |
306 | | void put_epel_hv_fallback(int16_t *dst, ptrdiff_t dst_stride, |
307 | | const pixel_t *src, ptrdiff_t src_stride, |
308 | | int nPbWC, int nPbHC, |
309 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth) |
310 | 0 | { |
311 | 0 | const int shift1 = bit_depth-8; |
312 | 0 | const int shift2 = 6; |
313 | | //const int shift3 = 6; |
314 | |
|
315 | 0 | int extra_left = 1; |
316 | 0 | int extra_top = 1; |
317 | | // int extra_right = 2; |
318 | 0 | int extra_bottom= 2; |
319 | | |
320 | |
|
321 | 0 | int nPbH_extra = extra_top + nPbHC + extra_bottom; |
322 | |
|
323 | 0 | int16_t* tmp2buf = (int16_t*)alloca( nPbWC * nPbH_extra * sizeof(int16_t) ); |
324 | | |
325 | | /* |
326 | | int nPbW_extra = extra_left + nPbWC + extra_right; |
327 | | |
328 | | |
329 | | printf("x,y FracC: %d/%d\n",xFracC,yFracC); |
330 | | |
331 | | printf("---IN---\n"); |
332 | | |
333 | | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { |
334 | | uint8_t* p = &src[y*src_stride -extra_left]; |
335 | | |
336 | | for (int x=-extra_left;x<nPbWC+extra_right;x++) { |
337 | | printf("%05d ",*p << 6); |
338 | | p++; |
339 | | } |
340 | | printf("\n"); |
341 | | } |
342 | | */ |
343 | | |
344 | | |
345 | | // H-filters |
346 | |
|
347 | 0 | logtrace(LogMotion,"---H---\n"); |
348 | | //printf("---H---(%d)\n",xFracC); |
349 | |
|
350 | 0 | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { |
351 | 0 | const pixel_t* p = &src[y*src_stride - extra_left]; |
352 | |
|
353 | 0 | for (int x=0;x<nPbWC;x++) { |
354 | 0 | int16_t v; |
355 | 0 | switch (xFracC) { |
356 | 0 | case 0: v = p[1]; break; |
357 | 0 | case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>shift1; break; |
358 | 0 | case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>shift1; break; |
359 | 0 | case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>shift1; break; |
360 | 0 | case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>shift1; break; |
361 | 0 | case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>shift1; break; |
362 | 0 | case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>shift1; break; |
363 | 0 | default: |
364 | 0 | case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>shift1; break; |
365 | 0 | } |
366 | | |
367 | | //printf("%d %d %d %d -> %d\n",p[0],p[1],p[2],p[3],v); |
368 | | |
369 | 0 | tmp2buf[y+extra_top + x*nPbH_extra] = v; |
370 | 0 | p++; |
371 | | |
372 | | //printf("%05d ",tmp2buf[y+extra_top + x*nPbH_extra]); |
373 | 0 | } |
374 | | //printf("\n"); |
375 | 0 | } |
376 | | |
377 | | // V-filters |
378 | | |
379 | 0 | int vshift = (xFracC==0 ? shift1 : shift2); |
380 | |
|
381 | 0 | for (int x=0;x<nPbWC;x++) { |
382 | 0 | int16_t* p = &tmp2buf[x*nPbH_extra]; |
383 | |
|
384 | 0 | for (int y=0;y<nPbHC;y++) { |
385 | 0 | int16_t v; |
386 | | //logtrace(LogMotion,"%x %x %x %x %x %x %x\n",p[0],p[1],p[2],p[3],p[4],p[5],p[6]); |
387 | |
|
388 | 0 | switch (yFracC) { |
389 | 0 | case 0: v = p[1]; break; |
390 | 0 | case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>vshift; break; |
391 | 0 | case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>vshift; break; |
392 | 0 | case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>vshift; break; |
393 | 0 | case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>vshift; break; |
394 | 0 | case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>vshift; break; |
395 | 0 | case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>vshift; break; |
396 | 0 | default: |
397 | 0 | case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>vshift; break; |
398 | 0 | } |
399 | | |
400 | 0 | dst[x + y*dst_stride] = v; |
401 | 0 | p++; |
402 | 0 | } |
403 | |
|
404 | 0 | } |
405 | | |
406 | | /* |
407 | | printf("---V---\n"); |
408 | | for (int y=0;y<nPbHC;y++) { |
409 | | for (int x=0;x<nPbWC;x++) { |
410 | | printf("%05d ",dst[x+y*dst_stride]); |
411 | | } |
412 | | printf("\n"); |
413 | | } |
414 | | */ |
415 | 0 | } Unexecuted instantiation: void put_epel_hv_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, int, int, short*, int) Unexecuted instantiation: void put_epel_hv_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, int, int, short*, int) |
416 | | |
417 | | |
418 | | template |
419 | | void put_epel_hv_fallback<uint8_t>(int16_t *dst, ptrdiff_t dst_stride, |
420 | | const uint8_t *src, ptrdiff_t src_stride, |
421 | | int nPbWC, int nPbHC, |
422 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth); |
423 | | template |
424 | | void put_epel_hv_fallback<uint16_t>(int16_t *dst, ptrdiff_t dst_stride, |
425 | | const uint16_t *src, ptrdiff_t src_stride, |
426 | | int nPbWC, int nPbHC, |
427 | | int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth); |
428 | | |
429 | | |
430 | | |
431 | | void put_qpel_0_0_fallback(int16_t *out, ptrdiff_t out_stride, |
432 | | const uint8_t *src, ptrdiff_t srcstride, |
433 | | int nPbW, int nPbH, int16_t* mcbuffer) |
434 | 0 | { |
435 | | //const int shift1 = 0; // sps->BitDepth_Y-8; |
436 | 0 | const int shift2 = 6; |
437 | | |
438 | | // straight copy |
439 | |
|
440 | 0 | for (int y=0;y<nPbH;y++) { |
441 | 0 | const uint8_t* p = src + srcstride*y; |
442 | 0 | int16_t* o = out + out_stride*y; |
443 | |
|
444 | 0 | for (int x=0;x<nPbW;x+=4) { |
445 | | |
446 | | // does not seem to be faster... |
447 | 0 | int16_t o0,o1,o2,o3; |
448 | 0 | o0 = p[0] << shift2; |
449 | 0 | o1 = p[1] << shift2; |
450 | 0 | o2 = p[2] << shift2; |
451 | 0 | o3 = p[3] << shift2; |
452 | 0 | o[0]=o0; |
453 | 0 | o[1]=o1; |
454 | 0 | o[2]=o2; |
455 | 0 | o[3]=o3; |
456 | |
|
457 | 0 | o+=4; |
458 | 0 | p+=4; |
459 | 0 | } |
460 | 0 | } |
461 | 0 | } |
462 | | |
463 | | |
464 | | void put_qpel_0_0_fallback_16(int16_t *out, ptrdiff_t out_stride, |
465 | | const uint16_t *src, ptrdiff_t srcstride, |
466 | | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) |
467 | 0 | { |
468 | | //const int shift1 = bit_depth-8; |
469 | | //const int shift2 = 6; |
470 | | // shift3 per HEVC v2 (10/2014) spec 8.5.3.3.3.2 (luma): Max(2, 14 - BitDepth). |
471 | | // The Max() was added with the Range Extensions in v2 to handle BitDepth up to 16; |
472 | | // the v1 (04/2013) formula was just (14 - BitDepth), valid only for BitDepth <= 14. |
473 | 0 | const int shift3 = std::max(2, 14-bit_depth); |
474 | | |
475 | | // straight copy |
476 | |
|
477 | 0 | for (int y=0;y<nPbH;y++) { |
478 | 0 | const uint16_t* p = src + srcstride*y; |
479 | 0 | int16_t* o = out + out_stride*y; |
480 | |
|
481 | 0 | for (int x=0;x<nPbW;x++) { |
482 | 0 | *o++ = *p++ << shift3; |
483 | 0 | } |
484 | 0 | } |
485 | 0 | } |
486 | | |
487 | | |
488 | | |
489 | | static int extra_before[4] = { 0,3,3,2 }; |
490 | | static int extra_after [4] = { 0,3,4,4 }; |
491 | | |
492 | | template <class pixel_t> |
493 | | void put_qpel_fallback(int16_t *out, ptrdiff_t out_stride, |
494 | | const pixel_t *src, ptrdiff_t srcstride, |
495 | | int nPbW, int nPbH, int16_t* mcbuffer, |
496 | | int xFracL, int yFracL, int bit_depth) |
497 | 0 | { |
498 | 0 | int extra_left = extra_before[xFracL]; |
499 | | //int extra_right = extra_after [xFracL]; |
500 | 0 | int extra_top = extra_before[yFracL]; |
501 | 0 | int extra_bottom = extra_after [yFracL]; |
502 | | |
503 | | //int nPbW_extra = extra_left + nPbW + extra_right; |
504 | 0 | int nPbH_extra = extra_top + nPbH + extra_bottom; |
505 | |
|
506 | 0 | const int shift1 = bit_depth-8; |
507 | 0 | const int shift2 = 6; |
508 | | |
509 | | |
510 | | // H-filters |
511 | |
|
512 | 0 | switch (xFracL) { |
513 | 0 | case 0: |
514 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
515 | 0 | const pixel_t* p = src + srcstride*y - extra_left; |
516 | 0 | int16_t* o = &mcbuffer[y+extra_top]; |
517 | |
|
518 | 0 | for (int x=0;x<nPbW;x++) { |
519 | 0 | *o = *p; |
520 | 0 | o += nPbH_extra; |
521 | 0 | p++; |
522 | 0 | } |
523 | 0 | } |
524 | 0 | break; |
525 | 0 | case 1: |
526 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
527 | 0 | const pixel_t* p = src + srcstride*y - extra_left; |
528 | 0 | int16_t* o = &mcbuffer[y+extra_top]; |
529 | |
|
530 | 0 | for (int x=0;x<nPbW;x++) { |
531 | 0 | *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>shift1; |
532 | 0 | o += nPbH_extra; |
533 | 0 | p++; |
534 | 0 | } |
535 | 0 | } |
536 | 0 | break; |
537 | 0 | case 2: |
538 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
539 | 0 | const pixel_t* p = src + srcstride*y - extra_left; |
540 | 0 | int16_t* o = &mcbuffer[y+extra_top]; |
541 | |
|
542 | 0 | for (int x=0;x<nPbW;x++) { |
543 | 0 | *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>shift1; |
544 | 0 | o += nPbH_extra; |
545 | 0 | p++; |
546 | 0 | } |
547 | 0 | } |
548 | 0 | break; |
549 | 0 | case 3: |
550 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
551 | 0 | const pixel_t* p = src + srcstride*y - extra_left; |
552 | 0 | int16_t* o = &mcbuffer[y+extra_top]; |
553 | |
|
554 | 0 | for (int x=0;x<nPbW;x++) { |
555 | 0 | *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>shift1; |
556 | 0 | o += nPbH_extra; |
557 | 0 | p++; |
558 | 0 | } |
559 | 0 | } |
560 | 0 | break; |
561 | 0 | } |
562 | | |
563 | | |
564 | 0 | logtrace(LogMotion,"---H---\n"); |
565 | |
|
566 | 0 | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
567 | 0 | for (int x=0;x<nPbW;x++) { |
568 | 0 | logtrace(LogMotion,"%04x ",mcbuffer[y+extra_top + x*nPbH_extra]); |
569 | 0 | } |
570 | 0 | logtrace(LogMotion,"\n"); |
571 | 0 | } |
572 | | |
573 | | // V-filters |
574 | |
|
575 | 0 | int vshift = (xFracL==0 ? shift1 : shift2); |
576 | |
|
577 | 0 | switch (yFracL) { |
578 | 0 | case 0: |
579 | 0 | for (int x=0;x<nPbW;x++) { |
580 | 0 | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
581 | 0 | int16_t* o = &out[x]; |
582 | |
|
583 | 0 | for (int y=0;y<nPbH;y++) { |
584 | 0 | *o = *p; |
585 | 0 | o+=out_stride; |
586 | 0 | p++; |
587 | 0 | } |
588 | 0 | } |
589 | 0 | break; |
590 | 0 | case 1: |
591 | 0 | for (int x=0;x<nPbW;x++) { |
592 | 0 | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
593 | 0 | int16_t* o = &out[x]; |
594 | |
|
595 | 0 | for (int y=0;y<nPbH;y++) { |
596 | 0 | *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>vshift; |
597 | 0 | o+=out_stride; |
598 | 0 | p++; |
599 | 0 | } |
600 | 0 | } |
601 | 0 | break; |
602 | 0 | case 2: |
603 | 0 | for (int x=0;x<nPbW;x++) { |
604 | 0 | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
605 | 0 | int16_t* o = &out[x]; |
606 | |
|
607 | 0 | for (int y=0;y<nPbH;y++) { |
608 | 0 | *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>vshift; |
609 | 0 | o+=out_stride; |
610 | 0 | p++; |
611 | 0 | } |
612 | 0 | } |
613 | 0 | break; |
614 | 0 | case 3: |
615 | 0 | for (int x=0;x<nPbW;x++) { |
616 | 0 | const int16_t* p = &mcbuffer[x*nPbH_extra]; |
617 | 0 | int16_t* o = &out[x]; |
618 | |
|
619 | 0 | for (int y=0;y<nPbH;y++) { |
620 | 0 | *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>vshift; |
621 | 0 | o+=out_stride; |
622 | 0 | p++; |
623 | 0 | } |
624 | 0 | } |
625 | 0 | break; |
626 | 0 | } |
627 | | |
628 | | |
629 | 0 | logtrace(LogMotion,"---V---\n"); |
630 | 0 | for (int y=0;y<nPbH;y++) { |
631 | 0 | for (int x=0;x<nPbW;x++) { |
632 | 0 | logtrace(LogMotion,"%04x ",out[x+y*out_stride]); |
633 | 0 | } |
634 | 0 | logtrace(LogMotion,"\n"); |
635 | 0 | } |
636 | 0 | } Unexecuted instantiation: void put_qpel_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, short*, int, int, int) Unexecuted instantiation: void put_qpel_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, short*, int, int, int) |
637 | | |
638 | | |
639 | | |
640 | | #define QPEL(x,y) void put_qpel_ ## x ## _ ## y ## _fallback(int16_t *out, ptrdiff_t out_stride, \ |
641 | | const uint8_t *src, ptrdiff_t srcstride, \ |
642 | | int nPbW, int nPbH, int16_t* mcbuffer) \ |
643 | 0 | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, 8 ); }Unexecuted instantiation: put_qpel_0_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_0_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_0_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_1_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_2_3_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_0_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_1_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_2_fallback(short*, long, unsigned char const*, long, int, int, short*) Unexecuted instantiation: put_qpel_3_3_fallback(short*, long, unsigned char const*, long, int, int, short*) |
644 | | |
645 | | |
646 | | #define QPEL16(x,y) void put_qpel_ ## x ## _ ## y ## _fallback_16(int16_t *out, ptrdiff_t out_stride, \ |
647 | | const uint16_t *src, ptrdiff_t srcstride, \ |
648 | 0 | int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \ |
649 | 0 | { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); }Unexecuted instantiation: put_qpel_0_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_0_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_0_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_1_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_1_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_1_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_1_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_2_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_2_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_2_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_2_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_3_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_3_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_3_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) Unexecuted instantiation: put_qpel_3_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int) |
650 | | |
651 | | /* */ QPEL(0,1) QPEL(0,2) QPEL(0,3) |
652 | | QPEL(1,0) QPEL(1,1) QPEL(1,2) QPEL(1,3) |
653 | | QPEL(2,0) QPEL(2,1) QPEL(2,2) QPEL(2,3) |
654 | | QPEL(3,0) QPEL(3,1) QPEL(3,2) QPEL(3,3) |
655 | | |
656 | | /* */ QPEL16(0,1) QPEL16(0,2) QPEL16(0,3) |
657 | | QPEL16(1,0) QPEL16(1,1) QPEL16(1,2) QPEL16(1,3) |
658 | | QPEL16(2,0) QPEL16(2,1) QPEL16(2,2) QPEL16(2,3) |
659 | | QPEL16(3,0) QPEL16(3,1) QPEL16(3,2) QPEL16(3,3) |