Coverage Report

Created: 2022-08-24 06:15

/src/libde265/libde265/fallback-motion.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * H.265 video codec.
3
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4
 *
5
 * This file is part of libde265.
6
 *
7
 * libde265 is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as
9
 * published by the Free Software Foundation, either version 3 of
10
 * the License, or (at your option) any later version.
11
 *
12
 * libde265 is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "fallback-motion.h"
22
#include "util.h"
23
24
#if defined(_MSC_VER) || defined(__MINGW32__)
25
# include <malloc.h>
26
#elif defined(HAVE_ALLOCA_H)
27
# include <alloca.h>
28
#endif
29
30
#include <assert.h>
31
32
33
void put_unweighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride,
34
                                    const int16_t *src, ptrdiff_t srcstride,
35
                                    int width, int height)
36
0
{
37
0
  int offset8bit = 32;
38
0
  int shift8bit = 6;
39
40
0
  assert((width&1)==0);
41
42
0
  for (int y=0;y<height;y++) {
43
0
    const int16_t* in  = &src[y*srcstride];
44
0
    uint8_t* out = &dst[y*dststride];
45
46
0
    for (int x=0;x<width;x+=2) {
47
0
      out[0] = Clip1_8bit((in[0] + offset8bit)>>shift8bit);
48
0
      out[1] = Clip1_8bit((in[1] + offset8bit)>>shift8bit);
49
0
      out+=2; in+=2;
50
0
    }
51
0
  }
52
0
}
53
54
55
void put_weighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride,
56
                                  const int16_t *src, ptrdiff_t srcstride,
57
                                  int width, int height,
58
                                  int w,int o,int log2WD)
59
0
{
60
0
  assert(log2WD>=1); // TODO
61
62
0
  const int rnd = (1<<(log2WD-1));
63
64
0
  for (int y=0;y<height;y++) {
65
0
    const int16_t* in  = &src[y*srcstride];
66
0
    uint8_t* out = &dst[y*dststride];
67
68
0
    for (int x=0;x<width;x++) {
69
0
      out[0] = Clip1_8bit(((in[0]*w + rnd)>>log2WD) + o);
70
0
      out++; in++;
71
0
    }
72
0
  }
73
0
}
74
75
void put_weighted_bipred_8_fallback(uint8_t *dst, ptrdiff_t dststride,
76
                                    const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
77
                                    int width, int height,
78
                                    int w1,int o1, int w2,int o2, int log2WD)
79
0
{
80
0
  assert(log2WD>=1); // TODO
81
82
0
  const int rnd = ((o1+o2+1) << log2WD);
83
84
0
  for (int y=0;y<height;y++) {
85
0
    const int16_t* in1 = &src1[y*srcstride];
86
0
    const int16_t* in2 = &src2[y*srcstride];
87
0
    uint8_t* out = &dst[y*dststride];
88
89
0
    for (int x=0;x<width;x++) {
90
0
      out[0] = Clip1_8bit((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1));
91
0
      out++; in1++; in2++;
92
0
    }
93
0
  }
94
0
}
95
96
97
void put_weighted_pred_avg_8_fallback(uint8_t *dst, ptrdiff_t dststride,
98
                                      const int16_t *src1, const int16_t *src2,
99
                                      ptrdiff_t srcstride, int width,
100
                                      int height)
101
0
{
102
0
  int offset8bit = 64;
103
0
  int shift8bit = 7;
104
105
0
  assert((width&1)==0);
106
107
  // I had a special case for 8-pixel parallel, unrolled code,
108
  // but I did not see any speedup.
109
110
#if 0
111
  for (int y=0;y<height;y++) {
112
    int16_t* in1 = &src1[y*srcstride];
113
    int16_t* in2 = &src2[y*srcstride];
114
    uint8_t* out = &dst[y*dststride];
115
116
    for (int x=0;x<width;x++) {
117
      out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit);
118
      out++; in1++; in2++;
119
    }
120
  }
121
#endif
122
123
#if 0
124
  if ((width&7)==0) {
125
    for (int y=0;y<height;y++) {
126
      int16_t* in1 = &src1[y*srcstride];
127
      int16_t* in2 = &src2[y*srcstride];
128
      uint8_t* out = &dst[y*dststride];
129
130
      for (int x=0;x<width;x+=8) {
131
        out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit);
132
        out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit);
133
        out[2] = Clip1_8bit((in1[2] + in2[2] + offset8bit)>>shift8bit);
134
        out[3] = Clip1_8bit((in1[3] + in2[3] + offset8bit)>>shift8bit);
135
        out[4] = Clip1_8bit((in1[4] + in2[4] + offset8bit)>>shift8bit);
136
        out[5] = Clip1_8bit((in1[5] + in2[5] + offset8bit)>>shift8bit);
137
        out[6] = Clip1_8bit((in1[6] + in2[6] + offset8bit)>>shift8bit);
138
        out[7] = Clip1_8bit((in1[7] + in2[7] + offset8bit)>>shift8bit);
139
        out+=8; in1+=8; in2+=8;
140
      }
141
    }
142
  }
143
  else
144
#endif
145
0
    {
146
0
      for (int y=0;y<height;y++) {
147
0
        const int16_t* in1 = &src1[y*srcstride];
148
0
        const int16_t* in2 = &src2[y*srcstride];
149
0
        uint8_t* out = &dst[y*dststride];
150
151
0
        for (int x=0;x<width;x+=2) {
152
0
          out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit);
153
0
          out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit);
154
0
          out+=2; in1+=2; in2+=2;
155
0
        }
156
0
      }
157
0
    }
158
0
}
159
160
161
162
163
164
void put_unweighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride,
165
                                     const int16_t *src, ptrdiff_t srcstride,
166
                                     int width, int height, int bit_depth)
167
0
{
168
0
  int shift1 = 14-bit_depth;
169
0
  int offset1 = 0;
170
0
  if (shift1>0) { offset1 = 1<<(shift1-1); }
171
172
0
  assert((width&1)==0);
173
174
0
  for (int y=0;y<height;y++) {
175
0
    const int16_t* in  = &src[y*srcstride];
176
0
    uint16_t* out = &dst[y*dststride];
177
178
0
    for (int x=0;x<width;x+=2) {
179
0
      out[0] = Clip_BitDepth((in[0] + offset1)>>shift1, bit_depth);
180
0
      out[1] = Clip_BitDepth((in[1] + offset1)>>shift1, bit_depth);
181
0
      out+=2; in+=2;
182
0
    }
183
0
  }
184
0
}
185
186
#include <stdlib.h>
187
188
void put_weighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride,
189
                                   const int16_t *src, ptrdiff_t srcstride,
190
                                   int width, int height,
191
                                   int w,int o,int log2WD, int bit_depth)
192
0
{
193
0
  assert(log2WD>=1); // TODO
194
195
0
  const int rnd = (1<<(log2WD-1));
196
197
0
  for (int y=0;y<height;y++) {
198
0
    const int16_t* in  = &src[y*srcstride];
199
0
    uint16_t* out = &dst[y*dststride];
200
201
0
    for (int x=0;x<width;x++) {
202
0
      out[0] = Clip_BitDepth(((in[0]*w + rnd)>>log2WD) + o, bit_depth);
203
0
      out++; in++;
204
0
    }
205
0
  }
206
0
}
207
208
void put_weighted_bipred_16_fallback(uint16_t *dst, ptrdiff_t dststride,
209
                                     const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
210
                                     int width, int height,
211
                                     int w1,int o1, int w2,int o2, int log2WD, int bit_depth)
212
0
{
213
0
  assert(log2WD>=1); // TODO
214
215
0
  const int rnd = ((o1+o2+1) << log2WD);
216
217
0
  for (int y=0;y<height;y++) {
218
0
    const int16_t* in1 = &src1[y*srcstride];
219
0
    const int16_t* in2 = &src2[y*srcstride];
220
0
    uint16_t* out = &dst[y*dststride];
221
222
0
    for (int x=0;x<width;x++) {
223
0
      out[0] = Clip_BitDepth((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1), bit_depth);
224
0
      out++; in1++; in2++;
225
0
    }
226
0
  }
227
0
}
228
229
230
void put_weighted_pred_avg_16_fallback(uint16_t *dst, ptrdiff_t dststride,
231
                                       const int16_t *src1, const int16_t *src2,
232
                                       ptrdiff_t srcstride, int width,
233
                                       int height, int bit_depth)
234
0
{
235
0
  int shift2 = 15-bit_depth;
236
0
  int offset2 = 1<<(shift2-1);
237
238
0
  assert((width&1)==0);
239
240
0
  for (int y=0;y<height;y++) {
241
0
    const int16_t* in1 = &src1[y*srcstride];
242
0
    const int16_t* in2 = &src2[y*srcstride];
243
0
    uint16_t* out = &dst[y*dststride];
244
245
0
    for (int x=0;x<width;x+=2) {
246
0
      out[0] = Clip_BitDepth((in1[0] + in2[0] + offset2)>>shift2, bit_depth);
247
0
      out[1] = Clip_BitDepth((in1[1] + in2[1] + offset2)>>shift2, bit_depth);
248
0
      out+=2; in1+=2; in2+=2;
249
0
    }
250
0
  }
251
0
}
252
253
254
255
256
257
void put_epel_8_fallback(int16_t *out, ptrdiff_t out_stride,
258
                         const uint8_t *src, ptrdiff_t src_stride,
259
                         int width, int height,
260
                         int mx, int my, int16_t* mcbuffer)
261
0
{
262
0
  int shift3 = 6;
263
264
0
  for (int y=0;y<height;y++) {
265
0
    int16_t* o = &out[y*out_stride];
266
0
    const uint8_t* i = &src[y*src_stride];
267
268
0
    for (int x=0;x<width;x++) {
269
0
      *o = *i << shift3;
270
0
      o++;
271
0
      i++;
272
0
    }
273
0
  }
274
0
}
275
276
277
void put_epel_16_fallback(int16_t *out, ptrdiff_t out_stride,
278
                          const uint16_t *src, ptrdiff_t src_stride,
279
                          int width, int height,
280
                          int mx, int my, int16_t* mcbuffer, int bit_depth)
281
0
{
282
0
  int shift3 = 14 - bit_depth;
283
284
0
  for (int y=0;y<height;y++) {
285
0
    int16_t* o = &out[y*out_stride];
286
0
    const uint16_t* i = &src[y*src_stride];
287
288
0
    for (int x=0;x<width;x++) {
289
0
      *o = *i << shift3;
290
0
      o++;
291
0
      i++;
292
0
    }
293
0
  }
294
0
}
295
296
297
template <class pixel_t>
298
void put_epel_hv_fallback(int16_t *dst, ptrdiff_t dst_stride,
299
                          const pixel_t *src, ptrdiff_t src_stride,
300
                          int nPbWC, int nPbHC,
301
                          int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth)
302
0
{
303
0
  const int shift1 = bit_depth-8;
304
0
  const int shift2 = 6;
305
  //const int shift3 = 6;
306
307
0
  int extra_left = 1;
308
0
  int extra_top  = 1;
309
  //  int extra_right = 2;
310
0
  int extra_bottom= 2;
311
312
313
0
  int nPbH_extra = extra_top  + nPbHC + extra_bottom;
314
315
0
  int16_t* tmp2buf = (int16_t*)alloca( nPbWC      * nPbH_extra * sizeof(int16_t) );
316
317
  /*
318
  int nPbW_extra = extra_left + nPbWC + extra_right;
319
320
321
  printf("x,y FracC: %d/%d\n",xFracC,yFracC);
322
323
  printf("---IN---\n");
324
325
  for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
326
    uint8_t* p = &src[y*src_stride -extra_left];
327
328
    for (int x=-extra_left;x<nPbWC+extra_right;x++) {
329
      printf("%05d ",*p << 6);
330
      p++;
331
    }
332
    printf("\n");
333
  }
334
  */
335
336
337
  // H-filters
338
339
0
  logtrace(LogMotion,"---H---\n");
340
  //printf("---H---(%d)\n",xFracC);
341
342
0
  for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
343
0
    const pixel_t* p = &src[y*src_stride - extra_left];
344
345
0
    for (int x=0;x<nPbWC;x++) {
346
0
      int16_t v;
347
0
      switch (xFracC) {
348
0
      case 0: v = p[1]; break;
349
0
      case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>shift1; break;
350
0
      case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>shift1; break;
351
0
      case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>shift1; break;
352
0
      case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>shift1; break;
353
0
      case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>shift1; break;
354
0
      case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>shift1; break;
355
0
      default:
356
0
      case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>shift1; break;
357
0
      }
358
359
      //printf("%d %d %d %d -> %d\n",p[0],p[1],p[2],p[3],v);
360
361
0
      tmp2buf[y+extra_top + x*nPbH_extra] = v;
362
0
      p++;
363
364
      //printf("%05d ",tmp2buf[y+extra_top + x*nPbH_extra]);
365
0
    }
366
    //printf("\n");
367
0
  }
368
369
  // V-filters
370
371
0
  int vshift = (xFracC==0 ? shift1 : shift2);
372
373
0
  for (int x=0;x<nPbWC;x++) {
374
0
    int16_t* p = &tmp2buf[x*nPbH_extra];
375
376
0
    for (int y=0;y<nPbHC;y++) {
377
0
      int16_t v;
378
      //logtrace(LogMotion,"%x %x %x  %x  %x %x %x\n",p[0],p[1],p[2],p[3],p[4],p[5],p[6]);
379
380
0
      switch (yFracC) {
381
0
      case 0: v = p[1]; break;
382
0
      case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>vshift; break;
383
0
      case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>vshift; break;
384
0
      case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>vshift; break;
385
0
      case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>vshift; break;
386
0
      case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>vshift; break;
387
0
      case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>vshift; break;
388
0
      default:
389
0
      case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>vshift; break;
390
0
      }
391
392
0
      dst[x + y*dst_stride] = v;
393
0
      p++;
394
0
    }
395
396
0
  }
397
398
  /*
399
  printf("---V---\n");
400
  for (int y=0;y<nPbHC;y++) {
401
    for (int x=0;x<nPbWC;x++) {
402
      printf("%05d ",dst[x+y*dst_stride]);
403
    }
404
    printf("\n");
405
  }
406
  */
407
0
}
Unexecuted instantiation: void put_epel_hv_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, int, int, short*, int)
Unexecuted instantiation: void put_epel_hv_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, int, int, short*, int)
408
409
410
template
411
void put_epel_hv_fallback<uint8_t>(int16_t *dst, ptrdiff_t dst_stride,
412
                                   const uint8_t *src, ptrdiff_t src_stride,
413
                                   int nPbWC, int nPbHC,
414
                                   int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth);
415
template
416
void put_epel_hv_fallback<uint16_t>(int16_t *dst, ptrdiff_t dst_stride,
417
                                    const uint16_t *src, ptrdiff_t src_stride,
418
                                    int nPbWC, int nPbHC,
419
                                    int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth);
420
421
422
423
void put_qpel_0_0_fallback(int16_t *out, ptrdiff_t out_stride,
424
                           const uint8_t *src, ptrdiff_t srcstride,
425
                           int nPbW, int nPbH, int16_t* mcbuffer)
426
0
{
427
  //const int shift1 = 0; // sps->BitDepth_Y-8;
428
0
  const int shift2 = 6;
429
430
  // straight copy
431
432
0
  for (int y=0;y<nPbH;y++) {
433
0
      const uint8_t* p = src + srcstride*y;
434
0
      int16_t* o = out + out_stride*y;
435
436
0
      for (int x=0;x<nPbW;x+=4) {
437
438
        // does not seem to be faster...
439
0
        int16_t o0,o1,o2,o3;
440
0
        o0 = p[0] << shift2;
441
0
        o1 = p[1] << shift2;
442
0
        o2 = p[2] << shift2;
443
0
        o3 = p[3] << shift2;
444
0
        o[0]=o0;
445
0
        o[1]=o1;
446
0
        o[2]=o2;
447
0
        o[3]=o3;
448
449
0
        o+=4;
450
0
        p+=4;
451
0
      }
452
0
  }
453
0
}
454
455
456
void put_qpel_0_0_fallback_16(int16_t *out, ptrdiff_t out_stride,
457
                              const uint16_t *src, ptrdiff_t srcstride,
458
                              int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth)
459
0
{
460
  //const int shift1 = bit_depth-8;
461
  //const int shift2 = 6;
462
0
  const int shift3 = 14-bit_depth;
463
464
  // straight copy
465
466
0
  for (int y=0;y<nPbH;y++) {
467
0
    const uint16_t* p = src + srcstride*y;
468
0
    int16_t* o = out + out_stride*y;
469
470
0
    for (int x=0;x<nPbW;x++) {
471
0
      *o++ = *p++ << shift3;
472
0
    }
473
0
  }
474
0
}
475
476
477
478
static int extra_before[4] = { 0,3,3,2 };
479
static int extra_after [4] = { 0,3,4,4 };
480
481
template <class pixel_t>
482
void put_qpel_fallback(int16_t *out, ptrdiff_t out_stride,
483
                       const pixel_t *src, ptrdiff_t srcstride,
484
                       int nPbW, int nPbH, int16_t* mcbuffer,
485
                       int xFracL, int yFracL, int bit_depth)
486
0
{
487
0
  int extra_left   = extra_before[xFracL];
488
  //int extra_right  = extra_after [xFracL];
489
0
  int extra_top    = extra_before[yFracL];
490
0
  int extra_bottom = extra_after [yFracL];
491
492
  //int nPbW_extra = extra_left + nPbW + extra_right;
493
0
  int nPbH_extra = extra_top  + nPbH + extra_bottom;
494
495
0
  const int shift1 = bit_depth-8;
496
0
  const int shift2 = 6;
497
498
499
  // H-filters
500
501
0
  switch (xFracL) {
502
0
  case 0:
503
0
    for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
504
0
      const pixel_t* p = src + srcstride*y - extra_left;
505
0
      int16_t* o = &mcbuffer[y+extra_top];
506
507
0
      for (int x=0;x<nPbW;x++) {
508
0
        *o = *p;
509
0
        o += nPbH_extra;
510
0
        p++;
511
0
      }
512
0
    }
513
0
    break;
514
0
  case 1:
515
0
    for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
516
0
      const pixel_t* p = src + srcstride*y - extra_left;
517
0
      int16_t* o = &mcbuffer[y+extra_top];
518
519
0
      for (int x=0;x<nPbW;x++) {
520
0
        *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5]  +p[6])>>shift1;
521
0
        o += nPbH_extra;
522
0
        p++;
523
0
      }
524
0
    }
525
0
    break;
526
0
  case 2:
527
0
    for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
528
0
      const pixel_t* p = src + srcstride*y - extra_left;
529
0
      int16_t* o = &mcbuffer[y+extra_top];
530
531
0
      for (int x=0;x<nPbW;x++) {
532
0
        *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>shift1;
533
0
        o += nPbH_extra;
534
0
        p++;
535
0
      }
536
0
    }
537
0
    break;
538
0
  case 3:
539
0
    for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
540
0
      const pixel_t* p = src + srcstride*y - extra_left;
541
0
      int16_t* o = &mcbuffer[y+extra_top];
542
543
0
      for (int x=0;x<nPbW;x++) {
544
0
        *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5]  -p[6])>>shift1;
545
0
        o += nPbH_extra;
546
0
        p++;
547
0
      }
548
0
    }
549
0
    break;
550
0
  }
551
552
553
0
  logtrace(LogMotion,"---H---\n");
554
555
0
  for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
556
0
    for (int x=0;x<nPbW;x++) {
557
0
      logtrace(LogMotion,"%04x ",mcbuffer[y+extra_top + x*nPbH_extra]);
558
0
    }
559
0
    logtrace(LogMotion,"\n");
560
0
  }
561
562
  // V-filters
563
564
0
  int vshift = (xFracL==0 ? shift1 : shift2);
565
566
0
  switch (yFracL) {
567
0
  case 0:
568
0
    for (int x=0;x<nPbW;x++) {
569
0
      const int16_t* p = &mcbuffer[x*nPbH_extra];
570
0
      int16_t* o = &out[x];
571
572
0
      for (int y=0;y<nPbH;y++) {
573
0
        *o = *p;
574
0
        o+=out_stride;
575
0
        p++;
576
0
      }
577
0
    }
578
0
    break;
579
0
  case 1:
580
0
    for (int x=0;x<nPbW;x++) {
581
0
      const int16_t* p = &mcbuffer[x*nPbH_extra];
582
0
      int16_t* o = &out[x];
583
584
0
      for (int y=0;y<nPbH;y++) {
585
0
        *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5]  +p[6])>>vshift;
586
0
        o+=out_stride;
587
0
        p++;
588
0
      }
589
0
    }
590
0
    break;
591
0
  case 2:
592
0
    for (int x=0;x<nPbW;x++) {
593
0
      const int16_t* p = &mcbuffer[x*nPbH_extra];
594
0
      int16_t* o = &out[x];
595
596
0
      for (int y=0;y<nPbH;y++) {
597
0
        *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>vshift;
598
0
        o+=out_stride;
599
0
        p++;
600
0
      }
601
0
    }
602
0
    break;
603
0
  case 3:
604
0
    for (int x=0;x<nPbW;x++) {
605
0
      const int16_t* p = &mcbuffer[x*nPbH_extra];
606
0
      int16_t* o = &out[x];
607
608
0
      for (int y=0;y<nPbH;y++) {
609
0
        *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5]  -p[6])>>vshift;
610
0
        o+=out_stride;
611
0
        p++;
612
0
      }
613
0
    }
614
0
    break;
615
0
  }
616
617
618
0
  logtrace(LogMotion,"---V---\n");
619
0
  for (int y=0;y<nPbH;y++) {
620
0
    for (int x=0;x<nPbW;x++) {
621
0
      logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
622
0
    }
623
0
    logtrace(LogMotion,"\n");
624
0
  }
625
0
}
Unexecuted instantiation: void put_qpel_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, short*, int, int, int)
Unexecuted instantiation: void put_qpel_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, short*, int, int, int)
626
627
628
629
#define QPEL(x,y) void put_qpel_ ## x ## _ ## y ## _fallback(int16_t *out, ptrdiff_t out_stride,    \
630
                                                             const uint8_t *src, ptrdiff_t srcstride, \
631
                                                             int nPbW, int nPbH, int16_t* mcbuffer) \
632
0
  { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, 8 ); }
Unexecuted instantiation: put_qpel_0_1_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_0_2_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_0_3_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_1_0_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_1_1_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_1_2_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_1_3_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_2_0_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_2_1_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_2_2_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_2_3_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_3_0_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_3_1_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_3_2_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_3_3_fallback(short*, long, unsigned char const*, long, int, int, short*)
633
634
635
#define QPEL16(x,y) void put_qpel_ ## x ## _ ## y ## _fallback_16(int16_t *out, ptrdiff_t out_stride,    \
636
                                                                  const uint16_t *src, ptrdiff_t srcstride, \
637
0
                                                                  int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \
638
0
{ put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); }
Unexecuted instantiation: put_qpel_0_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_0_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_0_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_1_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_1_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_1_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_1_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_2_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_2_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_2_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_2_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_3_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_3_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_3_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_3_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
639
640
/*     */ QPEL(0,1) QPEL(0,2) QPEL(0,3)
641
QPEL(1,0) QPEL(1,1) QPEL(1,2) QPEL(1,3)
642
QPEL(2,0) QPEL(2,1) QPEL(2,2) QPEL(2,3)
643
QPEL(3,0) QPEL(3,1) QPEL(3,2) QPEL(3,3)
644
645
/*       */ QPEL16(0,1) QPEL16(0,2) QPEL16(0,3)
646
QPEL16(1,0) QPEL16(1,1) QPEL16(1,2) QPEL16(1,3)
647
QPEL16(2,0) QPEL16(2,1) QPEL16(2,2) QPEL16(2,3)
648
QPEL16(3,0) QPEL16(3,1) QPEL16(3,2) QPEL16(3,3)