Coverage Report

Created: 2026-06-15 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/libde265/libde265/fallback-motion.cc
Line
Count
Source
1
/*
2
 * H.265 video codec.
3
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4
 *
5
 * This file is part of libde265.
6
 *
7
 * libde265 is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as
9
 * published by the Free Software Foundation, either version 3 of
10
 * the License, or (at your option) any later version.
11
 *
12
 * libde265 is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "fallback-motion.h"
22
#include "util.h"
23
24
#if defined(_MSC_VER) || defined(__MINGW32__)
25
# include <malloc.h>
26
#elif defined(HAVE_ALLOCA_H)
27
# include <alloca.h>
28
#endif
29
30
#include <assert.h>
31
32
33
void put_unweighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride,
34
                                    const int16_t *src, ptrdiff_t srcstride,
35
                                    int width, int height)
36
0
{
37
0
  int offset8bit = 32;
38
0
  int shift8bit = 6;
39
40
0
  assert((width&1)==0);
41
42
0
  for (int y=0;y<height;y++) {
43
0
    const int16_t* in  = &src[y*srcstride];
44
0
    uint8_t* out = &dst[y*dststride];
45
46
0
    for (int x=0;x<width;x+=2) {
47
0
      out[0] = Clip1_8bit((in[0] + offset8bit)>>shift8bit);
48
0
      out[1] = Clip1_8bit((in[1] + offset8bit)>>shift8bit);
49
0
      out+=2; in+=2;
50
0
    }
51
0
  }
52
0
}
53
54
55
void put_weighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride,
56
                                  const int16_t *src, ptrdiff_t srcstride,
57
                                  int width, int height,
58
                                  int w,int o,int log2WD)
59
0
{
60
0
  assert(log2WD>=1); // TODO
61
62
0
  const int rnd = (1<<(log2WD-1));
63
64
0
  for (int y=0;y<height;y++) {
65
0
    const int16_t* in  = &src[y*srcstride];
66
0
    uint8_t* out = &dst[y*dststride];
67
68
0
    for (int x=0;x<width;x++) {
69
0
      out[0] = Clip1_8bit(((in[0]*w + rnd)>>log2WD) + o);
70
0
      out++; in++;
71
0
    }
72
0
  }
73
0
}
74
75
void put_weighted_bipred_8_fallback(uint8_t *dst, ptrdiff_t dststride,
76
                                    const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
77
                                    int width, int height,
78
                                    int w1,int o1, int w2,int o2, int log2WD)
79
0
{
80
0
  assert(log2WD>=1); // TODO
81
82
0
  const int rnd = static_cast<int>(static_cast<unsigned int>(o1+o2+1) << log2WD);
83
84
0
  for (int y=0;y<height;y++) {
85
0
    const int16_t* in1 = &src1[y*srcstride];
86
0
    const int16_t* in2 = &src2[y*srcstride];
87
0
    uint8_t* out = &dst[y*dststride];
88
89
0
    for (int x=0;x<width;x++) {
90
0
      out[0] = Clip1_8bit((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1));
91
0
      out++; in1++; in2++;
92
0
    }
93
0
  }
94
0
}
95
96
97
void put_weighted_pred_avg_8_fallback(uint8_t *dst, ptrdiff_t dststride,
98
                                      const int16_t *src1, const int16_t *src2,
99
                                      ptrdiff_t srcstride, int width,
100
                                      int height)
101
0
{
102
0
  int offset8bit = 64;
103
0
  int shift8bit = 7;
104
105
0
  assert((width&1)==0);
106
107
  // I had a special case for 8-pixel parallel, unrolled code,
108
  // but I did not see any speedup.
109
110
#if 0
111
  for (int y=0;y<height;y++) {
112
    int16_t* in1 = &src1[y*srcstride];
113
    int16_t* in2 = &src2[y*srcstride];
114
    uint8_t* out = &dst[y*dststride];
115
116
    for (int x=0;x<width;x++) {
117
      out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit);
118
      out++; in1++; in2++;
119
    }
120
  }
121
#endif
122
123
#if 0
124
  if ((width&7)==0) {
125
    for (int y=0;y<height;y++) {
126
      int16_t* in1 = &src1[y*srcstride];
127
      int16_t* in2 = &src2[y*srcstride];
128
      uint8_t* out = &dst[y*dststride];
129
130
      for (int x=0;x<width;x+=8) {
131
        out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit);
132
        out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit);
133
        out[2] = Clip1_8bit((in1[2] + in2[2] + offset8bit)>>shift8bit);
134
        out[3] = Clip1_8bit((in1[3] + in2[3] + offset8bit)>>shift8bit);
135
        out[4] = Clip1_8bit((in1[4] + in2[4] + offset8bit)>>shift8bit);
136
        out[5] = Clip1_8bit((in1[5] + in2[5] + offset8bit)>>shift8bit);
137
        out[6] = Clip1_8bit((in1[6] + in2[6] + offset8bit)>>shift8bit);
138
        out[7] = Clip1_8bit((in1[7] + in2[7] + offset8bit)>>shift8bit);
139
        out+=8; in1+=8; in2+=8;
140
      }
141
    }
142
  }
143
  else
144
#endif
145
0
    {
146
0
      for (int y=0;y<height;y++) {
147
0
        const int16_t* in1 = &src1[y*srcstride];
148
0
        const int16_t* in2 = &src2[y*srcstride];
149
0
        uint8_t* out = &dst[y*dststride];
150
151
0
        for (int x=0;x<width;x+=2) {
152
0
          out[0] = Clip1_8bit((in1[0] + in2[0] + offset8bit)>>shift8bit);
153
0
          out[1] = Clip1_8bit((in1[1] + in2[1] + offset8bit)>>shift8bit);
154
0
          out+=2; in1+=2; in2+=2;
155
0
        }
156
0
      }
157
0
    }
158
0
}
159
160
161
162
163
164
void put_unweighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride,
165
                                     const int16_t *src, ptrdiff_t srcstride,
166
                                     int width, int height, int bit_depth)
167
0
{
168
  // shift1 per HEVC v2 (10/2014) spec 8.5.3.3.4.2: Max(2, 14 - BitDepth).
169
  // The Max() was added with the Range Extensions in v2 to handle BitDepth up to 16;
170
  // the v1 (04/2013) formula was just (14 - BitDepth), valid only for BitDepth <= 14.
171
0
  int shift1 = std::max(2, 14-bit_depth);
172
0
  int offset1 = 1<<(shift1-1);
173
174
0
  assert((width&1)==0);
175
176
0
  for (int y=0;y<height;y++) {
177
0
    const int16_t* in  = &src[y*srcstride];
178
0
    uint16_t* out = &dst[y*dststride];
179
180
0
    for (int x=0;x<width;x+=2) {
181
0
      out[0] = Clip_BitDepth((in[0] + offset1)>>shift1, bit_depth);
182
0
      out[1] = Clip_BitDepth((in[1] + offset1)>>shift1, bit_depth);
183
0
      out+=2; in+=2;
184
0
    }
185
0
  }
186
0
}
187
188
#include <stdlib.h>
189
190
void put_weighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride,
191
                                   const int16_t *src, ptrdiff_t srcstride,
192
                                   int width, int height,
193
                                   int w,int o,int log2WD, int bit_depth)
194
0
{
195
0
  assert(log2WD>=1); // TODO
196
197
0
  const int rnd = (1<<(log2WD-1));
198
199
0
  for (int y=0;y<height;y++) {
200
0
    const int16_t* in  = &src[y*srcstride];
201
0
    uint16_t* out = &dst[y*dststride];
202
203
0
    for (int x=0;x<width;x++) {
204
0
      out[0] = Clip_BitDepth(((in[0]*w + rnd)>>log2WD) + o, bit_depth);
205
0
      out++; in++;
206
0
    }
207
0
  }
208
0
}
209
210
void put_weighted_bipred_16_fallback(uint16_t *dst, ptrdiff_t dststride,
211
                                     const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,
212
                                     int width, int height,
213
                                     int w1,int o1, int w2,int o2, int log2WD, int bit_depth)
214
0
{
215
0
  assert(log2WD>=1); // TODO
216
217
0
  const int rnd = static_cast<int>(static_cast<unsigned int>(o1+o2+1) << log2WD);
218
219
0
  for (int y=0;y<height;y++) {
220
0
    const int16_t* in1 = &src1[y*srcstride];
221
0
    const int16_t* in2 = &src2[y*srcstride];
222
0
    uint16_t* out = &dst[y*dststride];
223
224
0
    for (int x=0;x<width;x++) {
225
0
      out[0] = Clip_BitDepth((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1), bit_depth);
226
0
      out++; in1++; in2++;
227
0
    }
228
0
  }
229
0
}
230
231
232
void put_weighted_pred_avg_16_fallback(uint16_t *dst, ptrdiff_t dststride,
233
                                       const int16_t *src1, const int16_t *src2,
234
                                       ptrdiff_t srcstride, int width,
235
                                       int height, int bit_depth)
236
0
{
237
  // shift2 per HEVC v2 (10/2014) spec 8.5.3.3.4.2: Max(3, 15 - BitDepth).
238
  // The Max() was added with the Range Extensions in v2 to handle BitDepth up to 16;
239
  // the v1 (04/2013) formula was just (15 - BitDepth), valid only for BitDepth <= 14.
240
0
  int shift2 = std::max(3, 15-bit_depth);
241
0
  int offset2 = 1<<(shift2-1);
242
243
0
  assert((width&1)==0);
244
245
0
  for (int y=0;y<height;y++) {
246
0
    const int16_t* in1 = &src1[y*srcstride];
247
0
    const int16_t* in2 = &src2[y*srcstride];
248
0
    uint16_t* out = &dst[y*dststride];
249
250
0
    for (int x=0;x<width;x+=2) {
251
0
      out[0] = Clip_BitDepth((in1[0] + in2[0] + offset2)>>shift2, bit_depth);
252
0
      out[1] = Clip_BitDepth((in1[1] + in2[1] + offset2)>>shift2, bit_depth);
253
0
      out+=2; in1+=2; in2+=2;
254
0
    }
255
0
  }
256
0
}
257
258
259
260
261
262
void put_epel_8_fallback(int16_t *out, ptrdiff_t out_stride,
263
                         const uint8_t *src, ptrdiff_t src_stride,
264
                         int width, int height,
265
                         int mx, int my, int16_t* mcbuffer)
266
0
{
267
0
  int shift3 = 6;
268
269
0
  for (int y=0;y<height;y++) {
270
0
    int16_t* o = &out[y*out_stride];
271
0
    const uint8_t* i = &src[y*src_stride];
272
273
0
    for (int x=0;x<width;x++) {
274
0
      *o = *i << shift3;
275
0
      o++;
276
0
      i++;
277
0
    }
278
0
  }
279
0
}
280
281
282
void put_epel_16_fallback(int16_t *out, ptrdiff_t out_stride,
283
                          const uint16_t *src, ptrdiff_t src_stride,
284
                          int width, int height,
285
                          int mx, int my, int16_t* mcbuffer, int bit_depth)
286
0
{
287
  // shift3 per HEVC v2 (10/2014) spec 8.5.3.3.3.3 (chroma): Max(2, 14 - BitDepth).
288
  // The Max() was added with the Range Extensions in v2 to handle BitDepth up to 16;
289
  // the v1 (04/2013) formula was just (14 - BitDepth), valid only for BitDepth <= 14.
290
0
  int shift3 = std::max(2, 14 - bit_depth);
291
292
0
  for (int y=0;y<height;y++) {
293
0
    int16_t* o = &out[y*out_stride];
294
0
    const uint16_t* i = &src[y*src_stride];
295
296
0
    for (int x=0;x<width;x++) {
297
0
      *o = *i << shift3;
298
0
      o++;
299
0
      i++;
300
0
    }
301
0
  }
302
0
}
303
304
305
template <class pixel_t>
306
void put_epel_hv_fallback(int16_t *dst, ptrdiff_t dst_stride,
307
                          const pixel_t *src, ptrdiff_t src_stride,
308
                          int nPbWC, int nPbHC,
309
                          int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth)
310
0
{
311
0
  const int shift1 = bit_depth-8;
312
0
  const int shift2 = 6;
313
  //const int shift3 = 6;
314
315
0
  int extra_left = 1;
316
0
  int extra_top  = 1;
317
  //  int extra_right = 2;
318
0
  int extra_bottom= 2;
319
320
321
0
  int nPbH_extra = extra_top  + nPbHC + extra_bottom;
322
323
0
  int16_t* tmp2buf = (int16_t*)alloca( nPbWC      * nPbH_extra * sizeof(int16_t) );
324
325
  /*
326
  int nPbW_extra = extra_left + nPbWC + extra_right;
327
328
329
  printf("x,y FracC: %d/%d\n",xFracC,yFracC);
330
331
  printf("---IN---\n");
332
333
  for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
334
    uint8_t* p = &src[y*src_stride -extra_left];
335
336
    for (int x=-extra_left;x<nPbWC+extra_right;x++) {
337
      printf("%05d ",*p << 6);
338
      p++;
339
    }
340
    printf("\n");
341
  }
342
  */
343
344
345
  // H-filters
346
347
0
  logtrace(LogMotion,"---H---\n");
348
  //printf("---H---(%d)\n",xFracC);
349
350
0
  for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
351
0
    const pixel_t* p = &src[y*src_stride - extra_left];
352
353
0
    for (int x=0;x<nPbWC;x++) {
354
0
      int16_t v;
355
0
      switch (xFracC) {
356
0
      case 0: v = p[1]; break;
357
0
      case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>shift1; break;
358
0
      case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>shift1; break;
359
0
      case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>shift1; break;
360
0
      case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>shift1; break;
361
0
      case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>shift1; break;
362
0
      case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>shift1; break;
363
0
      default:
364
0
      case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>shift1; break;
365
0
      }
366
367
      //printf("%d %d %d %d -> %d\n",p[0],p[1],p[2],p[3],v);
368
369
0
      tmp2buf[y+extra_top + x*nPbH_extra] = v;
370
0
      p++;
371
372
      //printf("%05d ",tmp2buf[y+extra_top + x*nPbH_extra]);
373
0
    }
374
    //printf("\n");
375
0
  }
376
377
  // V-filters
378
379
0
  int vshift = (xFracC==0 ? shift1 : shift2);
380
381
0
  for (int x=0;x<nPbWC;x++) {
382
0
    int16_t* p = &tmp2buf[x*nPbH_extra];
383
384
0
    for (int y=0;y<nPbHC;y++) {
385
0
      int16_t v;
386
      //logtrace(LogMotion,"%x %x %x  %x  %x %x %x\n",p[0],p[1],p[2],p[3],p[4],p[5],p[6]);
387
388
0
      switch (yFracC) {
389
0
      case 0: v = p[1]; break;
390
0
      case 1: v = (-2*p[0]+58*p[1]+10*p[2]-2*p[3])>>vshift; break;
391
0
      case 2: v = (-4*p[0]+54*p[1]+16*p[2]-2*p[3])>>vshift; break;
392
0
      case 3: v = (-6*p[0]+46*p[1]+28*p[2]-4*p[3])>>vshift; break;
393
0
      case 4: v = (-4*p[0]+36*p[1]+36*p[2]-4*p[3])>>vshift; break;
394
0
      case 5: v = (-4*p[0]+28*p[1]+46*p[2]-6*p[3])>>vshift; break;
395
0
      case 6: v = (-2*p[0]+16*p[1]+54*p[2]-4*p[3])>>vshift; break;
396
0
      default:
397
0
      case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>vshift; break;
398
0
      }
399
400
0
      dst[x + y*dst_stride] = v;
401
0
      p++;
402
0
    }
403
404
0
  }
405
406
  /*
407
  printf("---V---\n");
408
  for (int y=0;y<nPbHC;y++) {
409
    for (int x=0;x<nPbWC;x++) {
410
      printf("%05d ",dst[x+y*dst_stride]);
411
    }
412
    printf("\n");
413
  }
414
  */
415
0
}
Unexecuted instantiation: void put_epel_hv_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, int, int, short*, int)
Unexecuted instantiation: void put_epel_hv_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, int, int, short*, int)
416
417
418
template
419
void put_epel_hv_fallback<uint8_t>(int16_t *dst, ptrdiff_t dst_stride,
420
                                   const uint8_t *src, ptrdiff_t src_stride,
421
                                   int nPbWC, int nPbHC,
422
                                   int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth);
423
template
424
void put_epel_hv_fallback<uint16_t>(int16_t *dst, ptrdiff_t dst_stride,
425
                                    const uint16_t *src, ptrdiff_t src_stride,
426
                                    int nPbWC, int nPbHC,
427
                                    int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth);
428
429
430
431
void put_qpel_0_0_fallback(int16_t *out, ptrdiff_t out_stride,
432
                           const uint8_t *src, ptrdiff_t srcstride,
433
                           int nPbW, int nPbH, int16_t* mcbuffer)
434
0
{
435
  //const int shift1 = 0; // sps->BitDepth_Y-8;
436
0
  const int shift2 = 6;
437
438
  // straight copy
439
440
0
  for (int y=0;y<nPbH;y++) {
441
0
      const uint8_t* p = src + srcstride*y;
442
0
      int16_t* o = out + out_stride*y;
443
444
0
      for (int x=0;x<nPbW;x+=4) {
445
446
        // does not seem to be faster...
447
0
        int16_t o0,o1,o2,o3;
448
0
        o0 = p[0] << shift2;
449
0
        o1 = p[1] << shift2;
450
0
        o2 = p[2] << shift2;
451
0
        o3 = p[3] << shift2;
452
0
        o[0]=o0;
453
0
        o[1]=o1;
454
0
        o[2]=o2;
455
0
        o[3]=o3;
456
457
0
        o+=4;
458
0
        p+=4;
459
0
      }
460
0
  }
461
0
}
462
463
464
void put_qpel_0_0_fallback_16(int16_t *out, ptrdiff_t out_stride,
465
                              const uint16_t *src, ptrdiff_t srcstride,
466
                              int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth)
467
0
{
468
  //const int shift1 = bit_depth-8;
469
  //const int shift2 = 6;
470
  // shift3 per HEVC v2 (10/2014) spec 8.5.3.3.3.2 (luma): Max(2, 14 - BitDepth).
471
  // The Max() was added with the Range Extensions in v2 to handle BitDepth up to 16;
472
  // the v1 (04/2013) formula was just (14 - BitDepth), valid only for BitDepth <= 14.
473
0
  const int shift3 = std::max(2, 14-bit_depth);
474
475
  // straight copy
476
477
0
  for (int y=0;y<nPbH;y++) {
478
0
    const uint16_t* p = src + srcstride*y;
479
0
    int16_t* o = out + out_stride*y;
480
481
0
    for (int x=0;x<nPbW;x++) {
482
0
      *o++ = *p++ << shift3;
483
0
    }
484
0
  }
485
0
}
486
487
488
489
static int extra_before[4] = { 0,3,3,2 };
490
static int extra_after [4] = { 0,3,4,4 };
491
492
template <class pixel_t>
493
void put_qpel_fallback(int16_t *out, ptrdiff_t out_stride,
494
                       const pixel_t *src, ptrdiff_t srcstride,
495
                       int nPbW, int nPbH, int16_t* mcbuffer,
496
                       int xFracL, int yFracL, int bit_depth)
497
0
{
498
0
  int extra_left   = extra_before[xFracL];
499
  //int extra_right  = extra_after [xFracL];
500
0
  int extra_top    = extra_before[yFracL];
501
0
  int extra_bottom = extra_after [yFracL];
502
503
  //int nPbW_extra = extra_left + nPbW + extra_right;
504
0
  int nPbH_extra = extra_top  + nPbH + extra_bottom;
505
506
0
  const int shift1 = bit_depth-8;
507
0
  const int shift2 = 6;
508
509
510
  // H-filters
511
512
0
  switch (xFracL) {
513
0
  case 0:
514
0
    for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
515
0
      const pixel_t* p = src + srcstride*y - extra_left;
516
0
      int16_t* o = &mcbuffer[y+extra_top];
517
518
0
      for (int x=0;x<nPbW;x++) {
519
0
        *o = *p;
520
0
        o += nPbH_extra;
521
0
        p++;
522
0
      }
523
0
    }
524
0
    break;
525
0
  case 1:
526
0
    for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
527
0
      const pixel_t* p = src + srcstride*y - extra_left;
528
0
      int16_t* o = &mcbuffer[y+extra_top];
529
530
0
      for (int x=0;x<nPbW;x++) {
531
0
        *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5]  +p[6])>>shift1;
532
0
        o += nPbH_extra;
533
0
        p++;
534
0
      }
535
0
    }
536
0
    break;
537
0
  case 2:
538
0
    for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
539
0
      const pixel_t* p = src + srcstride*y - extra_left;
540
0
      int16_t* o = &mcbuffer[y+extra_top];
541
542
0
      for (int x=0;x<nPbW;x++) {
543
0
        *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>shift1;
544
0
        o += nPbH_extra;
545
0
        p++;
546
0
      }
547
0
    }
548
0
    break;
549
0
  case 3:
550
0
    for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
551
0
      const pixel_t* p = src + srcstride*y - extra_left;
552
0
      int16_t* o = &mcbuffer[y+extra_top];
553
554
0
      for (int x=0;x<nPbW;x++) {
555
0
        *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5]  -p[6])>>shift1;
556
0
        o += nPbH_extra;
557
0
        p++;
558
0
      }
559
0
    }
560
0
    break;
561
0
  }
562
563
564
0
  logtrace(LogMotion,"---H---\n");
565
566
0
  for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
567
0
    for (int x=0;x<nPbW;x++) {
568
0
      logtrace(LogMotion,"%04x ",mcbuffer[y+extra_top + x*nPbH_extra]);
569
0
    }
570
0
    logtrace(LogMotion,"\n");
571
0
  }
572
573
  // V-filters
574
575
0
  int vshift = (xFracL==0 ? shift1 : shift2);
576
577
0
  switch (yFracL) {
578
0
  case 0:
579
0
    for (int x=0;x<nPbW;x++) {
580
0
      const int16_t* p = &mcbuffer[x*nPbH_extra];
581
0
      int16_t* o = &out[x];
582
583
0
      for (int y=0;y<nPbH;y++) {
584
0
        *o = *p;
585
0
        o+=out_stride;
586
0
        p++;
587
0
      }
588
0
    }
589
0
    break;
590
0
  case 1:
591
0
    for (int x=0;x<nPbW;x++) {
592
0
      const int16_t* p = &mcbuffer[x*nPbH_extra];
593
0
      int16_t* o = &out[x];
594
595
0
      for (int y=0;y<nPbH;y++) {
596
0
        *o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5]  +p[6])>>vshift;
597
0
        o+=out_stride;
598
0
        p++;
599
0
      }
600
0
    }
601
0
    break;
602
0
  case 2:
603
0
    for (int x=0;x<nPbW;x++) {
604
0
      const int16_t* p = &mcbuffer[x*nPbH_extra];
605
0
      int16_t* o = &out[x];
606
607
0
      for (int y=0;y<nPbH;y++) {
608
0
        *o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>vshift;
609
0
        o+=out_stride;
610
0
        p++;
611
0
      }
612
0
    }
613
0
    break;
614
0
  case 3:
615
0
    for (int x=0;x<nPbW;x++) {
616
0
      const int16_t* p = &mcbuffer[x*nPbH_extra];
617
0
      int16_t* o = &out[x];
618
619
0
      for (int y=0;y<nPbH;y++) {
620
0
        *o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5]  -p[6])>>vshift;
621
0
        o+=out_stride;
622
0
        p++;
623
0
      }
624
0
    }
625
0
    break;
626
0
  }
627
628
629
0
  logtrace(LogMotion,"---V---\n");
630
0
  for (int y=0;y<nPbH;y++) {
631
0
    for (int x=0;x<nPbW;x++) {
632
0
      logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
633
0
    }
634
0
    logtrace(LogMotion,"\n");
635
0
  }
636
0
}
Unexecuted instantiation: void put_qpel_fallback<unsigned char>(short*, long, unsigned char const*, long, int, int, short*, int, int, int)
Unexecuted instantiation: void put_qpel_fallback<unsigned short>(short*, long, unsigned short const*, long, int, int, short*, int, int, int)
637
638
639
640
#define QPEL(x,y) void put_qpel_ ## x ## _ ## y ## _fallback(int16_t *out, ptrdiff_t out_stride,    \
641
                                                             const uint8_t *src, ptrdiff_t srcstride, \
642
                                                             int nPbW, int nPbH, int16_t* mcbuffer) \
643
0
  { put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, 8 ); }
Unexecuted instantiation: put_qpel_0_1_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_0_2_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_0_3_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_1_0_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_1_1_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_1_2_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_1_3_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_2_0_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_2_1_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_2_2_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_2_3_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_3_0_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_3_1_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_3_2_fallback(short*, long, unsigned char const*, long, int, int, short*)
Unexecuted instantiation: put_qpel_3_3_fallback(short*, long, unsigned char const*, long, int, int, short*)
644
645
646
#define QPEL16(x,y) void put_qpel_ ## x ## _ ## y ## _fallback_16(int16_t *out, ptrdiff_t out_stride,    \
647
                                                                  const uint16_t *src, ptrdiff_t srcstride, \
648
0
                                                                  int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \
649
0
{ put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); }
Unexecuted instantiation: put_qpel_0_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_0_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_0_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_1_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_1_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_1_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_1_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_2_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_2_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_2_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_2_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_3_0_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_3_1_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_3_2_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
Unexecuted instantiation: put_qpel_3_3_fallback_16(short*, long, unsigned short const*, long, int, int, short*, int)
650
651
/*     */ QPEL(0,1) QPEL(0,2) QPEL(0,3)
652
QPEL(1,0) QPEL(1,1) QPEL(1,2) QPEL(1,3)
653
QPEL(2,0) QPEL(2,1) QPEL(2,2) QPEL(2,3)
654
QPEL(3,0) QPEL(3,1) QPEL(3,2) QPEL(3,3)
655
656
/*       */ QPEL16(0,1) QPEL16(0,2) QPEL16(0,3)
657
QPEL16(1,0) QPEL16(1,1) QPEL16(1,2) QPEL16(1,3)
658
QPEL16(2,0) QPEL16(2,1) QPEL16(2,2) QPEL16(2,3)
659
QPEL16(3,0) QPEL16(3,1) QPEL16(3,2) QPEL16(3,3)