Coverage Report

Created: 2026-04-01 07:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libde265/libde265/motion.cc
Line
Count
Source
1
/*
2
 * H.265 video codec.
3
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4
 *
5
 * This file is part of libde265.
6
 *
7
 * libde265 is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as
9
 * published by the Free Software Foundation, either version 3 of
10
 * the License, or (at your option) any later version.
11
 *
12
 * libde265 is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "motion.h"
22
#include "decctx.h"
23
#include "util.h"
24
#include "dpb.h"
25
26
#include <assert.h>
27
28
29
#include <sys/types.h>
30
#include <signal.h>
31
#include <string.h>
32
33
#if defined(_MSC_VER) || defined(__MINGW32__)
34
# include <malloc.h>
35
#elif defined(HAVE_ALLOCA_H)
36
# include <alloca.h>
37
#endif
38
39
40
49.0M
#define MAX_CU_SIZE 64
41
42
43
static int extra_before[4] = { 0,3,3,2 };
44
static int extra_after [4] = { 0,3,4,4 };
45
46
47
48
template <class pixel_t>
49
void mc_luma(const base_context* ctx,
50
             const seq_parameter_set* sps, int mv_x, int mv_y,
51
             int xP,int yP,
52
             int16_t* out, int out_stride,
53
             const pixel_t* ref, int ref_stride,
54
             int nPbW, int nPbH, int bitDepth_L)
55
483k
{
56
483k
  int xFracL = mv_x & 3;
57
483k
  int yFracL = mv_y & 3;
58
59
483k
  int xIntOffsL = xP + (mv_x>>2);
60
483k
  int yIntOffsL = yP + (mv_y>>2);
61
62
  // luma sample interpolation process (8.5.3.2.2.1)
63
64
  //const int shift1 = sps->BitDepth_Y-8;
65
  //const int shift2 = 6;
66
483k
  const int shift3 = 14 - sps->BitDepth_Y;
67
68
483k
  int w = sps->pic_width_in_luma_samples;
69
483k
  int h = sps->pic_height_in_luma_samples;
70
71
483k
  ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)];
72
73
483k
  if (xFracL==0 && yFracL==0) {
74
75
225k
    if (xIntOffsL >= 0 && yIntOffsL >= 0 &&
76
223k
        nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) {
77
78
220k
      ctx->acceleration.put_hevc_qpel(out, out_stride,
79
220k
                                      &ref[yIntOffsL*ref_stride + xIntOffsL],
80
220k
                                      ref_stride /* sizeof(pixel_t)*/,
81
220k
                                      nPbW,nPbH, mcbuffer, 0,0, bitDepth_L);
82
220k
    }
83
5.10k
    else {
84
53.1k
      for (int y=0;y<nPbH;y++)
85
631k
        for (int x=0;x<nPbW;x++) {
86
87
583k
          int xA = Clip3(0,w-1,x + xIntOffsL);
88
583k
          int yA = Clip3(0,h-1,y + yIntOffsL);
89
90
583k
          out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3;
91
583k
        }
92
5.10k
    }
93
94
#ifdef DE265_LOG_TRACE
95
    logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL);
96
97
    for (int y=0;y<nPbH;y++) {
98
      for (int x=0;x<nPbW;x++) {
99
100
        int xA = Clip3(0,w-1,x + xIntOffsL);
101
        int yA = Clip3(0,h-1,y + yIntOffsL);
102
103
        logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]);
104
      }
105
      logtrace(LogMotion,"\n");
106
    }
107
108
    logtrace(LogMotion," -> \n");
109
110
    for (int y=0;y<nPbH;y++) {
111
      for (int x=0;x<nPbW;x++) {
112
113
        logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions
114
      }
115
      logtrace(LogMotion,"\n");
116
    }
117
#endif
118
225k
  }
119
257k
  else {
120
257k
    int extra_left   = extra_before[xFracL];
121
257k
    int extra_right  = extra_after [xFracL];
122
257k
    int extra_top    = extra_before[yFracL];
123
257k
    int extra_bottom = extra_after [yFracL];
124
125
    //int nPbW_extra = extra_left + nPbW + extra_right;
126
    //int nPbH_extra = extra_top  + nPbH + extra_bottom;
127
128
129
257k
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)];
130
131
257k
    const pixel_t* src_ptr;
132
257k
    int src_stride;
133
134
257k
    if (-extra_left + xIntOffsL >= 0 &&
135
243k
        -extra_top  + yIntOffsL >= 0 &&
136
228k
        nPbW+extra_right  + xIntOffsL < w &&
137
205k
        nPbH+extra_bottom + yIntOffsL < h) {
138
187k
      src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride];
139
187k
      src_stride = ref_stride;
140
187k
    }
141
69.7k
    else {
142
      // Extend fill width to a multiple of 16 so that SIMD over-reads
143
      // in qpel interpolation hit valid (edge-clamped) data.
144
69.7k
      int fill_width = ((extra_left + nPbW + extra_right + 15) & ~15);
145
69.7k
      if (fill_width > MAX_CU_SIZE+16) fill_width = MAX_CU_SIZE+16;
146
147
1.09M
      for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
148
24.6M
        for (int x=-extra_left;x<fill_width - extra_left;x++) {
149
150
23.5M
          int xA = Clip3(0,w-1,x + xIntOffsL);
151
23.5M
          int yA = Clip3(0,h-1,y + yIntOffsL);
152
153
23.5M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
154
23.5M
        }
155
1.02M
      }
156
157
69.7k
      src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left];
158
69.7k
      src_stride = MAX_CU_SIZE+16;
159
69.7k
    }
160
161
257k
    ctx->acceleration.put_hevc_qpel(out, out_stride,
162
257k
                                    src_ptr, src_stride /* sizeof(pixel_t) */,
163
257k
                                    nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L);
164
165
166
257k
    logtrace(LogMotion,"---V---\n");
167
2.59M
    for (int y=0;y<nPbH;y++) {
168
28.0M
      for (int x=0;x<nPbW;x++) {
169
25.6M
        logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
170
25.6M
      }
171
2.33M
      logtrace(LogMotion,"\n");
172
2.33M
    }
173
257k
  }
174
483k
}
void mc_luma<unsigned short>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned short const*, int, int, int, int)
Line
Count
Source
55
137k
{
56
137k
  int xFracL = mv_x & 3;
57
137k
  int yFracL = mv_y & 3;
58
59
137k
  int xIntOffsL = xP + (mv_x>>2);
60
137k
  int yIntOffsL = yP + (mv_y>>2);
61
62
  // luma sample interpolation process (8.5.3.2.2.1)
63
64
  //const int shift1 = sps->BitDepth_Y-8;
65
  //const int shift2 = 6;
66
137k
  const int shift3 = 14 - sps->BitDepth_Y;
67
68
137k
  int w = sps->pic_width_in_luma_samples;
69
137k
  int h = sps->pic_height_in_luma_samples;
70
71
137k
  ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)];
72
73
137k
  if (xFracL==0 && yFracL==0) {
74
75
79.0k
    if (xIntOffsL >= 0 && yIntOffsL >= 0 &&
76
78.2k
        nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) {
77
78
77.0k
      ctx->acceleration.put_hevc_qpel(out, out_stride,
79
77.0k
                                      &ref[yIntOffsL*ref_stride + xIntOffsL],
80
77.0k
                                      ref_stride /* sizeof(pixel_t)*/,
81
77.0k
                                      nPbW,nPbH, mcbuffer, 0,0, bitDepth_L);
82
77.0k
    }
83
1.98k
    else {
84
20.3k
      for (int y=0;y<nPbH;y++)
85
228k
        for (int x=0;x<nPbW;x++) {
86
87
209k
          int xA = Clip3(0,w-1,x + xIntOffsL);
88
209k
          int yA = Clip3(0,h-1,y + yIntOffsL);
89
90
209k
          out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3;
91
209k
        }
92
1.98k
    }
93
94
#ifdef DE265_LOG_TRACE
95
    logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL);
96
97
    for (int y=0;y<nPbH;y++) {
98
      for (int x=0;x<nPbW;x++) {
99
100
        int xA = Clip3(0,w-1,x + xIntOffsL);
101
        int yA = Clip3(0,h-1,y + yIntOffsL);
102
103
        logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]);
104
      }
105
      logtrace(LogMotion,"\n");
106
    }
107
108
    logtrace(LogMotion," -> \n");
109
110
    for (int y=0;y<nPbH;y++) {
111
      for (int x=0;x<nPbW;x++) {
112
113
        logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions
114
      }
115
      logtrace(LogMotion,"\n");
116
    }
117
#endif
118
79.0k
  }
119
58.1k
  else {
120
58.1k
    int extra_left   = extra_before[xFracL];
121
58.1k
    int extra_right  = extra_after [xFracL];
122
58.1k
    int extra_top    = extra_before[yFracL];
123
58.1k
    int extra_bottom = extra_after [yFracL];
124
125
    //int nPbW_extra = extra_left + nPbW + extra_right;
126
    //int nPbH_extra = extra_top  + nPbH + extra_bottom;
127
128
129
58.1k
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)];
130
131
58.1k
    const pixel_t* src_ptr;
132
58.1k
    int src_stride;
133
134
58.1k
    if (-extra_left + xIntOffsL >= 0 &&
135
55.9k
        -extra_top  + yIntOffsL >= 0 &&
136
46.5k
        nPbW+extra_right  + xIntOffsL < w &&
137
41.8k
        nPbH+extra_bottom + yIntOffsL < h) {
138
32.7k
      src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride];
139
32.7k
      src_stride = ref_stride;
140
32.7k
    }
141
25.3k
    else {
142
      // Extend fill width to a multiple of 16 so that SIMD over-reads
143
      // in qpel interpolation hit valid (edge-clamped) data.
144
25.3k
      int fill_width = ((extra_left + nPbW + extra_right + 15) & ~15);
145
25.3k
      if (fill_width > MAX_CU_SIZE+16) fill_width = MAX_CU_SIZE+16;
146
147
385k
      for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
148
8.75M
        for (int x=-extra_left;x<fill_width - extra_left;x++) {
149
150
8.39M
          int xA = Clip3(0,w-1,x + xIntOffsL);
151
8.39M
          int yA = Clip3(0,h-1,y + yIntOffsL);
152
153
8.39M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
154
8.39M
        }
155
359k
      }
156
157
25.3k
      src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left];
158
25.3k
      src_stride = MAX_CU_SIZE+16;
159
25.3k
    }
160
161
58.1k
    ctx->acceleration.put_hevc_qpel(out, out_stride,
162
58.1k
                                    src_ptr, src_stride /* sizeof(pixel_t) */,
163
58.1k
                                    nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L);
164
165
166
58.1k
    logtrace(LogMotion,"---V---\n");
167
614k
    for (int y=0;y<nPbH;y++) {
168
8.05M
      for (int x=0;x<nPbW;x++) {
169
7.50M
        logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
170
7.50M
      }
171
556k
      logtrace(LogMotion,"\n");
172
556k
    }
173
58.1k
  }
174
137k
}
void mc_luma<unsigned char>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned char const*, int, int, int, int)
Line
Count
Source
55
346k
{
56
346k
  int xFracL = mv_x & 3;
57
346k
  int yFracL = mv_y & 3;
58
59
346k
  int xIntOffsL = xP + (mv_x>>2);
60
346k
  int yIntOffsL = yP + (mv_y>>2);
61
62
  // luma sample interpolation process (8.5.3.2.2.1)
63
64
  //const int shift1 = sps->BitDepth_Y-8;
65
  //const int shift2 = 6;
66
346k
  const int shift3 = 14 - sps->BitDepth_Y;
67
68
346k
  int w = sps->pic_width_in_luma_samples;
69
346k
  int h = sps->pic_height_in_luma_samples;
70
71
346k
  ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)];
72
73
346k
  if (xFracL==0 && yFracL==0) {
74
75
146k
    if (xIntOffsL >= 0 && yIntOffsL >= 0 &&
76
145k
        nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) {
77
78
143k
      ctx->acceleration.put_hevc_qpel(out, out_stride,
79
143k
                                      &ref[yIntOffsL*ref_stride + xIntOffsL],
80
143k
                                      ref_stride /* sizeof(pixel_t)*/,
81
143k
                                      nPbW,nPbH, mcbuffer, 0,0, bitDepth_L);
82
143k
    }
83
3.11k
    else {
84
32.7k
      for (int y=0;y<nPbH;y++)
85
403k
        for (int x=0;x<nPbW;x++) {
86
87
373k
          int xA = Clip3(0,w-1,x + xIntOffsL);
88
373k
          int yA = Clip3(0,h-1,y + yIntOffsL);
89
90
373k
          out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3;
91
373k
        }
92
3.11k
    }
93
94
#ifdef DE265_LOG_TRACE
95
    logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL);
96
97
    for (int y=0;y<nPbH;y++) {
98
      for (int x=0;x<nPbW;x++) {
99
100
        int xA = Clip3(0,w-1,x + xIntOffsL);
101
        int yA = Clip3(0,h-1,y + yIntOffsL);
102
103
        logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]);
104
      }
105
      logtrace(LogMotion,"\n");
106
    }
107
108
    logtrace(LogMotion," -> \n");
109
110
    for (int y=0;y<nPbH;y++) {
111
      for (int x=0;x<nPbW;x++) {
112
113
        logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions
114
      }
115
      logtrace(LogMotion,"\n");
116
    }
117
#endif
118
146k
  }
119
199k
  else {
120
199k
    int extra_left   = extra_before[xFracL];
121
199k
    int extra_right  = extra_after [xFracL];
122
199k
    int extra_top    = extra_before[yFracL];
123
199k
    int extra_bottom = extra_after [yFracL];
124
125
    //int nPbW_extra = extra_left + nPbW + extra_right;
126
    //int nPbH_extra = extra_top  + nPbH + extra_bottom;
127
128
129
199k
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)];
130
131
199k
    const pixel_t* src_ptr;
132
199k
    int src_stride;
133
134
199k
    if (-extra_left + xIntOffsL >= 0 &&
135
187k
        -extra_top  + yIntOffsL >= 0 &&
136
181k
        nPbW+extra_right  + xIntOffsL < w &&
137
163k
        nPbH+extra_bottom + yIntOffsL < h) {
138
155k
      src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride];
139
155k
      src_stride = ref_stride;
140
155k
    }
141
44.3k
    else {
142
      // Extend fill width to a multiple of 16 so that SIMD over-reads
143
      // in qpel interpolation hit valid (edge-clamped) data.
144
44.3k
      int fill_width = ((extra_left + nPbW + extra_right + 15) & ~15);
145
44.3k
      if (fill_width > MAX_CU_SIZE+16) fill_width = MAX_CU_SIZE+16;
146
147
707k
      for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
148
15.8M
        for (int x=-extra_left;x<fill_width - extra_left;x++) {
149
150
15.1M
          int xA = Clip3(0,w-1,x + xIntOffsL);
151
15.1M
          int yA = Clip3(0,h-1,y + yIntOffsL);
152
153
15.1M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
154
15.1M
        }
155
663k
      }
156
157
44.3k
      src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left];
158
44.3k
      src_stride = MAX_CU_SIZE+16;
159
44.3k
    }
160
161
199k
    ctx->acceleration.put_hevc_qpel(out, out_stride,
162
199k
                                    src_ptr, src_stride /* sizeof(pixel_t) */,
163
199k
                                    nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L);
164
165
166
199k
    logtrace(LogMotion,"---V---\n");
167
1.97M
    for (int y=0;y<nPbH;y++) {
168
19.9M
      for (int x=0;x<nPbW;x++) {
169
18.1M
        logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
170
18.1M
      }
171
1.77M
      logtrace(LogMotion,"\n");
172
1.77M
    }
173
199k
  }
174
346k
}
175
176
177
178
template <class pixel_t>
179
void mc_chroma(const base_context* ctx,
180
               const seq_parameter_set* sps,
181
               int mv_x, int mv_y,
182
               int xP,int yP,
183
               int16_t* out, int out_stride,
184
               const pixel_t* ref, int ref_stride,
185
               int nPbWC, int nPbHC, int bit_depth_C)
186
402k
{
187
  // chroma sample interpolation process (8.5.3.2.2.2)
188
189
  //const int shift1 = sps->BitDepth_C-8;
190
  //const int shift2 = 6;
191
402k
  const int shift3 = 14 - sps->BitDepth_C;
192
193
402k
  int wC = sps->pic_width_in_luma_samples /sps->SubWidthC;
194
402k
  int hC = sps->pic_height_in_luma_samples/sps->SubHeightC;
195
196
402k
  mv_x *= 2 / sps->SubWidthC;
197
402k
  mv_y *= 2 / sps->SubHeightC;
198
199
402k
  int xFracC = mv_x & 7;
200
402k
  int yFracC = mv_y & 7;
201
202
402k
  int xIntOffsC = xP/sps->SubWidthC  + (mv_x>>3);
203
402k
  int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3);
204
205
402k
  ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]);
206
207
402k
  if (xFracC == 0 && yFracC == 0) {
208
215k
    if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC &&
209
213k
        yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) {
210
210k
      ctx->acceleration.put_hevc_epel(out, out_stride,
211
210k
                                      &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride,
212
210k
                                      nPbWC,nPbHC, 0,0, nullptr, bit_depth_C);
213
210k
    }
214
5.09k
    else
215
5.09k
      {
216
57.4k
        for (int y=0;y<nPbHC;y++)
217
848k
          for (int x=0;x<nPbWC;x++) {
218
219
796k
            int xB = Clip3(0,wC-1,x + xIntOffsC);
220
796k
            int yB = Clip3(0,hC-1,y + yIntOffsC);
221
222
796k
            out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3;
223
796k
          }
224
5.09k
      }
225
215k
  }
226
186k
  else {
227
186k
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)];
228
229
186k
    const pixel_t* src_ptr;
230
186k
    int src_stride;
231
232
186k
    int extra_top  = 1;
233
186k
    int extra_left = 1;
234
186k
    int extra_right  = 2;
235
186k
    int extra_bottom = 2;
236
237
186k
    if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 &&
238
156k
        yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) {
239
135k
      src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride];
240
135k
      src_stride = ref_stride;
241
135k
    }
242
51.3k
    else {
243
      // Extend fill width to a multiple of 16 so that SIMD over-reads
244
      // in epel interpolation hit valid (edge-clamped) data.
245
51.3k
      int fill_width = ((extra_left + nPbWC + extra_right + 15) & ~15);
246
51.3k
      if (fill_width > MAX_CU_SIZE+16) fill_width = MAX_CU_SIZE+16;
247
248
873k
      for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
249
25.9M
        for (int x=-extra_left;x<fill_width - extra_left;x++) {
250
251
25.1M
          int xA = Clip3(0,wC-1,x + xIntOffsC);
252
25.1M
          int yA = Clip3(0,hC-1,y + yIntOffsC);
253
254
25.1M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
255
25.1M
        }
256
821k
      }
257
258
51.3k
      src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)];
259
51.3k
      src_stride = MAX_CU_SIZE+16;
260
51.3k
    }
261
262
263
186k
    if (xFracC && yFracC) {
264
96.7k
      ctx->acceleration.put_hevc_epel_hv(out, out_stride,
265
96.7k
                                         src_ptr, src_stride,
266
96.7k
                                         nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
267
96.7k
    }
268
89.8k
    else if (xFracC) {
269
46.2k
      ctx->acceleration.put_hevc_epel_h(out, out_stride,
270
46.2k
                                        src_ptr, src_stride,
271
46.2k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
272
46.2k
    }
273
43.6k
    else if (yFracC) {
274
43.6k
      ctx->acceleration.put_hevc_epel_v(out, out_stride,
275
43.6k
                                        src_ptr, src_stride,
276
43.6k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
277
43.6k
    }
278
0
    else {
279
0
      assert(false); // full-pel shifts are handled above
280
0
    }
281
186k
  }
282
402k
}
void mc_chroma<unsigned short>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned short const*, int, int, int, int)
Line
Count
Source
186
108k
{
187
  // chroma sample interpolation process (8.5.3.2.2.2)
188
189
  //const int shift1 = sps->BitDepth_C-8;
190
  //const int shift2 = 6;
191
108k
  const int shift3 = 14 - sps->BitDepth_C;
192
193
108k
  int wC = sps->pic_width_in_luma_samples /sps->SubWidthC;
194
108k
  int hC = sps->pic_height_in_luma_samples/sps->SubHeightC;
195
196
108k
  mv_x *= 2 / sps->SubWidthC;
197
108k
  mv_y *= 2 / sps->SubHeightC;
198
199
108k
  int xFracC = mv_x & 7;
200
108k
  int yFracC = mv_y & 7;
201
202
108k
  int xIntOffsC = xP/sps->SubWidthC  + (mv_x>>3);
203
108k
  int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3);
204
205
108k
  ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]);
206
207
108k
  if (xFracC == 0 && yFracC == 0) {
208
54.7k
    if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC &&
209
53.7k
        yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) {
210
52.9k
      ctx->acceleration.put_hevc_epel(out, out_stride,
211
52.9k
                                      &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride,
212
52.9k
                                      nPbWC,nPbHC, 0,0, nullptr, bit_depth_C);
213
52.9k
    }
214
1.87k
    else
215
1.87k
      {
216
21.2k
        for (int y=0;y<nPbHC;y++)
217
305k
          for (int x=0;x<nPbWC;x++) {
218
219
285k
            int xB = Clip3(0,wC-1,x + xIntOffsC);
220
285k
            int yB = Clip3(0,hC-1,y + yIntOffsC);
221
222
285k
            out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3;
223
285k
          }
224
1.87k
      }
225
54.7k
  }
226
53.5k
  else {
227
53.5k
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)];
228
229
53.5k
    const pixel_t* src_ptr;
230
53.5k
    int src_stride;
231
232
53.5k
    int extra_top  = 1;
233
53.5k
    int extra_left = 1;
234
53.5k
    int extra_right  = 2;
235
53.5k
    int extra_bottom = 2;
236
237
53.5k
    if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 &&
238
43.6k
        yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) {
239
36.5k
      src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride];
240
36.5k
      src_stride = ref_stride;
241
36.5k
    }
242
17.0k
    else {
243
      // Extend fill width to a multiple of 16 so that SIMD over-reads
244
      // in epel interpolation hit valid (edge-clamped) data.
245
17.0k
      int fill_width = ((extra_left + nPbWC + extra_right + 15) & ~15);
246
17.0k
      if (fill_width > MAX_CU_SIZE+16) fill_width = MAX_CU_SIZE+16;
247
248
291k
      for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
249
9.28M
        for (int x=-extra_left;x<fill_width - extra_left;x++) {
250
251
9.01M
          int xA = Clip3(0,wC-1,x + xIntOffsC);
252
9.01M
          int yA = Clip3(0,hC-1,y + yIntOffsC);
253
254
9.01M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
255
9.01M
        }
256
274k
      }
257
258
17.0k
      src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)];
259
17.0k
      src_stride = MAX_CU_SIZE+16;
260
17.0k
    }
261
262
263
53.5k
    if (xFracC && yFracC) {
264
26.2k
      ctx->acceleration.put_hevc_epel_hv(out, out_stride,
265
26.2k
                                         src_ptr, src_stride,
266
26.2k
                                         nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
267
26.2k
    }
268
27.3k
    else if (xFracC) {
269
15.2k
      ctx->acceleration.put_hevc_epel_h(out, out_stride,
270
15.2k
                                        src_ptr, src_stride,
271
15.2k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
272
15.2k
    }
273
12.1k
    else if (yFracC) {
274
12.1k
      ctx->acceleration.put_hevc_epel_v(out, out_stride,
275
12.1k
                                        src_ptr, src_stride,
276
12.1k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
277
12.1k
    }
278
0
    else {
279
      assert(false); // full-pel shifts are handled above
280
0
    }
281
53.5k
  }
282
108k
}
void mc_chroma<unsigned char>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned char const*, int, int, int, int)
Line
Count
Source
186
294k
{
187
  // chroma sample interpolation process (8.5.3.2.2.2)
188
189
  //const int shift1 = sps->BitDepth_C-8;
190
  //const int shift2 = 6;
191
294k
  const int shift3 = 14 - sps->BitDepth_C;
192
193
294k
  int wC = sps->pic_width_in_luma_samples /sps->SubWidthC;
194
294k
  int hC = sps->pic_height_in_luma_samples/sps->SubHeightC;
195
196
294k
  mv_x *= 2 / sps->SubWidthC;
197
294k
  mv_y *= 2 / sps->SubHeightC;
198
199
294k
  int xFracC = mv_x & 7;
200
294k
  int yFracC = mv_y & 7;
201
202
294k
  int xIntOffsC = xP/sps->SubWidthC  + (mv_x>>3);
203
294k
  int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3);
204
205
294k
  ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]);
206
207
294k
  if (xFracC == 0 && yFracC == 0) {
208
160k
    if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC &&
209
159k
        yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) {
210
157k
      ctx->acceleration.put_hevc_epel(out, out_stride,
211
157k
                                      &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride,
212
157k
                                      nPbWC,nPbHC, 0,0, nullptr, bit_depth_C);
213
157k
    }
214
3.22k
    else
215
3.22k
      {
216
36.1k
        for (int y=0;y<nPbHC;y++)
217
543k
          for (int x=0;x<nPbWC;x++) {
218
219
510k
            int xB = Clip3(0,wC-1,x + xIntOffsC);
220
510k
            int yB = Clip3(0,hC-1,y + yIntOffsC);
221
222
510k
            out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3;
223
510k
          }
224
3.22k
      }
225
160k
  }
226
133k
  else {
227
133k
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)];
228
229
133k
    const pixel_t* src_ptr;
230
133k
    int src_stride;
231
232
133k
    int extra_top  = 1;
233
133k
    int extra_left = 1;
234
133k
    int extra_right  = 2;
235
133k
    int extra_bottom = 2;
236
237
133k
    if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 &&
238
113k
        yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) {
239
98.7k
      src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride];
240
98.7k
      src_stride = ref_stride;
241
98.7k
    }
242
34.3k
    else {
243
      // Extend fill width to a multiple of 16 so that SIMD over-reads
244
      // in epel interpolation hit valid (edge-clamped) data.
245
34.3k
      int fill_width = ((extra_left + nPbWC + extra_right + 15) & ~15);
246
34.3k
      if (fill_width > MAX_CU_SIZE+16) fill_width = MAX_CU_SIZE+16;
247
248
581k
      for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
249
16.6M
        for (int x=-extra_left;x<fill_width - extra_left;x++) {
250
251
16.1M
          int xA = Clip3(0,wC-1,x + xIntOffsC);
252
16.1M
          int yA = Clip3(0,hC-1,y + yIntOffsC);
253
254
16.1M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
255
16.1M
        }
256
547k
      }
257
258
34.3k
      src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)];
259
34.3k
      src_stride = MAX_CU_SIZE+16;
260
34.3k
    }
261
262
263
133k
    if (xFracC && yFracC) {
264
70.5k
      ctx->acceleration.put_hevc_epel_hv(out, out_stride,
265
70.5k
                                         src_ptr, src_stride,
266
70.5k
                                         nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
267
70.5k
    }
268
62.5k
    else if (xFracC) {
269
31.0k
      ctx->acceleration.put_hevc_epel_h(out, out_stride,
270
31.0k
                                        src_ptr, src_stride,
271
31.0k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
272
31.0k
    }
273
31.5k
    else if (yFracC) {
274
31.5k
      ctx->acceleration.put_hevc_epel_v(out, out_stride,
275
31.5k
                                        src_ptr, src_stride,
276
31.5k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
277
31.5k
    }
278
0
    else {
279
      assert(false); // full-pel shifts are handled above
280
0
    }
281
133k
  }
282
294k
}
283
284
285
286
// 8.5.3.2
287
// NOTE: for full-pel shifts, we can introduce a fast path, simply copying without shifts
288
void generate_inter_prediction_samples(base_context* ctx,
289
                                       const slice_segment_header* shdr,
290
                                       de265_image* img,
291
                                       int xC,int yC,
292
                                       int xB,int yB,
293
                                       int nCS, int nPbW,int nPbH,
294
                                       const PBMotion* vi)
295
427k
{
296
427k
  int xP = xC+xB;
297
427k
  int yP = yC+yB;
298
299
427k
  void*  pixels[3];
300
427k
  int    stride[3];
301
302
427k
  const pic_parameter_set* pps = shdr->pps.get();
303
427k
  const seq_parameter_set* sps = pps->sps.get();
304
305
427k
  if (sps->BitDepth_Y != img->get_bit_depth(0) ||
306
427k
      sps->BitDepth_C != img->get_bit_depth(1)) {
307
0
    img->integrity = INTEGRITY_DECODING_ERRORS;
308
0
    ctx->add_warning(DE265_WARNING_BIT_DEPTH_OF_CURRENT_IMAGE_DOES_NOT_MATCH_SPS, false);
309
0
    return;
310
0
  }
311
312
427k
  if (sps->chroma_format_idc != img->get_chroma_format()) {
313
0
    img->integrity = INTEGRITY_DECODING_ERRORS;
314
0
    ctx->add_warning(DE265_WARNING_CHROMA_OF_CURRENT_IMAGE_DOES_NOT_MATCH_SPS, false);
315
0
    return;
316
0
  }
317
318
427k
  const int SubWidthC  = sps->SubWidthC;
319
427k
  const int SubHeightC = sps->SubHeightC;
320
321
427k
  pixels[0] = img->get_image_plane_at_pos_any_depth(0,xP,yP);
322
427k
  stride[0] = img->get_image_stride(0);
323
324
427k
  pixels[1] = img->get_image_plane_at_pos_any_depth(1,xP/SubWidthC,yP/SubHeightC);
325
427k
  stride[1] = img->get_image_stride(1);
326
327
427k
  pixels[2] = img->get_image_plane_at_pos_any_depth(2,xP/SubWidthC,yP/SubHeightC);
328
427k
  stride[2] = img->get_image_stride(2);
329
330
331
427k
  ALIGNED_16(int16_t) predSamplesL                 [2 /* LX */][MAX_CU_SIZE* MAX_CU_SIZE];
332
427k
  ALIGNED_16(int16_t) predSamplesC[2 /* chroma */ ][2 /* LX */][MAX_CU_SIZE* MAX_CU_SIZE];
333
334
  //int xP = xC+xB;
335
  //int yP = yC+yB;
336
337
427k
  int predFlag[2];
338
427k
  predFlag[0] = vi->predFlag[0];
339
427k
  predFlag[1] = vi->predFlag[1];
340
341
427k
  const int bit_depth_L = sps->BitDepth_Y;
342
427k
  const int bit_depth_C = sps->BitDepth_C;
343
344
  // Some encoders use bi-prediction with two similar MVs.
345
  // Identify this case and use only one MV.
346
347
  // do this only without weighted prediction, because the weights/offsets may be different
348
427k
  if (pps->weighted_pred_flag==0) {
349
378k
    if (predFlag[0] && predFlag[1]) {
350
113k
      if (vi->mv[0].x == vi->mv[1].x &&
351
75.9k
          vi->mv[0].y == vi->mv[1].y &&
352
66.7k
          shdr->RefPicList[0][vi->refIdx[0]] ==
353
66.7k
          shdr->RefPicList[1][vi->refIdx[1]]) {
354
62.7k
        predFlag[1] = 0;
355
62.7k
      }
356
113k
    }
357
378k
  }
358
359
360
  // Fill prediction samples with mid-grey in intermediate precision.
361
  // Used on error paths where the reference picture is unavailable or mismatched.
362
427k
  auto fill_pred_samples = [&](int l) {
363
0
    const int16_t fill = 1 << 13; // mid-grey: (1 << (bd-1)) << (14-bd) for any bd
364
0
    for (int y = 0; y < nPbH; y++)
365
0
      for (int x = 0; x < nPbW; x++)
366
0
        predSamplesL[l][y * nCS + x] = fill;
367
0
    if (img->get_chroma_format() != de265_chroma_mono) {
368
0
      int cW = nPbW / SubWidthC;
369
0
      int cH = nPbH / SubHeightC;
370
0
      for (int y = 0; y < cH; y++)
371
0
        for (int x = 0; x < cW; x++) {
372
0
          predSamplesC[0][l][y * nCS + x] = fill;
373
0
          predSamplesC[1][l][y * nCS + x] = fill;
374
0
        }
375
0
    }
376
0
  };
377
378
1.28M
  for (int l=0;l<2;l++) {
379
854k
    if (predFlag[l]) {
380
      // 8.5.3.2.1
381
382
483k
      const de265_image* refPic = ctx->get_image(shdr->RefPicList[l][vi->refIdx[l]]);
383
384
483k
      logtrace(LogMotion, "refIdx: %d -> dpb[%d]\n", vi->refIdx[l], shdr->RefPicList[l][vi->refIdx[l]]);
385
386
483k
      if (!refPic || refPic->PicState == UnusedForReference) {
387
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
388
0
        ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
389
0
        fill_pred_samples(l);
390
0
      }
391
483k
      else if (refPic->get_width(0) != sps->pic_width_in_luma_samples ||
392
483k
               refPic->get_height(0) != sps->pic_height_in_luma_samples ||
393
483k
               img->get_chroma_format() != refPic->get_chroma_format()) {
394
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
395
0
        ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_SIZE_DOES_NOT_MATCH_SPS, false);
396
0
        fill_pred_samples(l);
397
0
      }
398
483k
      else if (img->get_bit_depth(0) != refPic->get_bit_depth(0) ||
399
483k
               img->get_bit_depth(1) != refPic->get_bit_depth(1)) {
400
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
401
0
        ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_BIT_DEPTH_DOES_NOT_MATCH, false);
402
0
        fill_pred_samples(l);
403
0
      }
404
483k
      else if (img->get_chroma_format() != refPic->get_chroma_format()) {
405
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
406
0
        ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_CHROMA_FORMAT_DOES_NOT_MATCH, false);
407
0
        fill_pred_samples(l);
408
0
      }
409
483k
      else {
410
        // 8.5.3.2.2
411
412
483k
        logtrace(LogMotion,"do MC: L%d,MV=%d;%d RefPOC=%d\n",
413
483k
                 l,vi->mv[l].x,vi->mv[l].y,refPic->PicOrderCntVal);
414
415
416
        // TODO: must predSamples stride really be nCS or can it be something smaller like nPbW?
417
418
483k
        if (img->high_bit_depth(0)) {
419
137k
          mc_luma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
420
137k
                  predSamplesL[l],nCS,
421
137k
                  (const uint16_t*)refPic->get_image_plane(0),
422
137k
                  refPic->get_luma_stride(), nPbW,nPbH, bit_depth_L);
423
137k
        }
424
346k
        else {
425
346k
          mc_luma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
426
346k
                  predSamplesL[l],nCS,
427
346k
                  (const uint8_t*)refPic->get_image_plane(0),
428
346k
                  refPic->get_luma_stride(), nPbW,nPbH, bit_depth_L);
429
346k
        }
430
431
483k
        if (img->get_chroma_format() != de265_chroma_mono) {
432
201k
          if (img->high_bit_depth(1)) {
433
54.1k
            mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP,
434
54.1k
                      predSamplesC[0][l], nCS, (const uint16_t*) refPic->get_image_plane(1),
435
54.1k
                      refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
436
54.1k
            mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP,
437
54.1k
                      predSamplesC[1][l], nCS, (const uint16_t*) refPic->get_image_plane(2),
438
54.1k
                      refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
439
54.1k
          }
440
147k
          else {
441
147k
            mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP,
442
147k
                      predSamplesC[0][l], nCS, (const uint8_t*) refPic->get_image_plane(1),
443
147k
                      refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
444
147k
            mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP,
445
147k
                      predSamplesC[1][l], nCS, (const uint8_t*) refPic->get_image_plane(2),
446
147k
                      refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
447
147k
          }
448
201k
        }
449
483k
      }
450
483k
    }
451
854k
  }
452
453
454
  // weighted sample prediction  (8.5.3.2.3)
455
456
427k
  const int shift1_L = libde265_max(2,14-sps->BitDepth_Y);
457
427k
  const int offset_shift1_L = img->get_sps().WpOffsetBdShiftY;
458
427k
  const int shift1_C = libde265_max(2,14-sps->BitDepth_C);
459
427k
  const int offset_shift1_C = img->get_sps().WpOffsetBdShiftC;
460
461
  /*
462
  const int shift1_L = 14-img->sps.BitDepth_Y;
463
  const int offset_shift1_L = img->sps.BitDepth_Y-8;
464
  const int shift1_C = 14-img->sps.BitDepth_C;
465
  const int offset_shift1_C = img->sps.BitDepth_C-8;
466
  */
467
468
  /*
469
  if (0)
470
  printf("%d/%d %d/%d %d/%d %d/%d\n",
471
         shift1_L,
472
         Nshift1_L,
473
         offset_shift1_L,
474
         Noffset_shift1_L,
475
         shift1_C,
476
         Nshift1_C,
477
         offset_shift1_C,
478
         Noffset_shift1_C);
479
480
  assert(shift1_L==
481
         Nshift1_L);
482
  assert(offset_shift1_L==
483
         Noffset_shift1_L);
484
  assert(shift1_C==
485
         Nshift1_C);
486
  assert(offset_shift1_C==
487
         Noffset_shift1_C);
488
  */
489
490
491
427k
  logtrace(LogMotion,"predFlags (modified): %d %d\n", predFlag[0], predFlag[1]);
492
493
427k
  if (shdr->slice_type == SLICE_TYPE_P) {
494
75.6k
    if (pps->weighted_pred_flag==0) {
495
37.5k
      if (predFlag[0]==1 && predFlag[1]==0) {
496
37.5k
        ctx->acceleration.put_unweighted_pred(pixels[0], stride[0],
497
37.5k
                                              predSamplesL[0],nCS, nPbW,nPbH, bit_depth_L);
498
499
37.5k
        if (img->get_chroma_format() != de265_chroma_mono) {
500
9.43k
          ctx->acceleration.put_unweighted_pred(pixels[1], stride[1],
501
9.43k
                                                predSamplesC[0][0], nCS,
502
9.43k
                                                nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
503
9.43k
          ctx->acceleration.put_unweighted_pred(pixels[2], stride[2],
504
9.43k
                                                predSamplesC[1][0], nCS,
505
9.43k
                                                nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
506
9.43k
        }
507
37.5k
      }
508
0
      else {
509
0
        ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
510
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
511
0
      }
512
37.5k
    }
513
38.1k
    else {
514
      // weighted prediction
515
516
38.1k
      if (predFlag[0]==1 && predFlag[1]==0) {
517
518
38.1k
        int refIdx0 = vi->refIdx[0];
519
520
38.1k
        int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
521
38.1k
        int chroma_log2WD = shdr->ChromaLog2WeightDenom  + shift1_C;
522
523
38.1k
        int luma_w0 = shdr->LumaWeight[0][refIdx0];
524
38.1k
        int luma_o0 = shdr->luma_offset[0][refIdx0] * (1<<(offset_shift1_L));
525
526
38.1k
        int chroma0_w0 = shdr->ChromaWeight[0][refIdx0][0];
527
38.1k
        int chroma0_o0 = shdr->ChromaOffset[0][refIdx0][0] * (1<<(offset_shift1_C));
528
38.1k
        int chroma1_w0 = shdr->ChromaWeight[0][refIdx0][1];
529
38.1k
        int chroma1_o0 = shdr->ChromaOffset[0][refIdx0][1] * (1<<(offset_shift1_C));
530
531
38.1k
        logtrace(LogMotion,"weighted-0 [%d] %d %d %d  %dx%d\n", refIdx0, luma_log2WD-6,luma_w0,luma_o0,nPbW,nPbH);
532
533
38.1k
        ctx->acceleration.put_weighted_pred(pixels[0], stride[0],
534
38.1k
                                            predSamplesL[0],nCS, nPbW,nPbH,
535
38.1k
                                            luma_w0, luma_o0, luma_log2WD, bit_depth_L);
536
38.1k
        if (img->get_chroma_format() != de265_chroma_mono) {
537
19.3k
          ctx->acceleration.put_weighted_pred(pixels[1], stride[1],
538
19.3k
                                              predSamplesC[0][0], nCS, nPbW / SubWidthC, nPbH / SubHeightC,
539
19.3k
                                              chroma0_w0, chroma0_o0, chroma_log2WD, bit_depth_C);
540
19.3k
          ctx->acceleration.put_weighted_pred(pixels[2], stride[2],
541
19.3k
                                              predSamplesC[1][0], nCS, nPbW / SubWidthC, nPbH / SubHeightC,
542
19.3k
                                              chroma1_w0, chroma1_o0, chroma_log2WD, bit_depth_C);
543
19.3k
        }
544
38.1k
      }
545
0
      else {
546
0
        ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
547
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
548
0
      }
549
38.1k
    }
550
75.6k
  }
551
351k
  else {
552
351k
    assert(shdr->slice_type == SLICE_TYPE_B);
553
554
351k
    if (predFlag[0]==1 && predFlag[1]==1) {
555
56.2k
      if (pps->weighted_bipred_flag==0) {
556
        //const int shift2  = 15-8; // TODO: real bit depth
557
        //const int offset2 = 1<<(shift2-1);
558
559
43.7k
        int16_t* in0 = predSamplesL[0];
560
43.7k
        int16_t* in1 = predSamplesL[1];
561
562
43.7k
        ctx->acceleration.put_weighted_pred_avg(pixels[0], stride[0],
563
43.7k
                                                in0,in1, nCS, nPbW, nPbH, bit_depth_L);
564
565
43.7k
        int16_t* in00 = predSamplesC[0][0];
566
43.7k
        int16_t* in01 = predSamplesC[0][1];
567
43.7k
        int16_t* in10 = predSamplesC[1][0];
568
43.7k
        int16_t* in11 = predSamplesC[1][1];
569
570
43.7k
        if (img->get_chroma_format() != de265_chroma_mono) {
571
17.4k
          ctx->acceleration.put_weighted_pred_avg(pixels[1], stride[1],
572
17.4k
                                                  in00, in01, nCS,
573
17.4k
                                                  nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
574
17.4k
          ctx->acceleration.put_weighted_pred_avg(pixels[2], stride[2],
575
17.4k
                                                  in10, in11, nCS,
576
17.4k
                                                  nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
577
17.4k
        }
578
43.7k
      }
579
12.5k
      else {
580
        // weighted prediction
581
582
12.5k
        int refIdx0 = vi->refIdx[0];
583
12.5k
        int refIdx1 = vi->refIdx[1];
584
585
12.5k
        int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
586
12.5k
        int chroma_log2WD = shdr->ChromaLog2WeightDenom + shift1_C;
587
588
12.5k
        int luma_w0 = shdr->LumaWeight[0][refIdx0];
589
12.5k
        int luma_o0 = shdr->luma_offset[0][refIdx0] * (1<<(offset_shift1_L));
590
12.5k
        int luma_w1 = shdr->LumaWeight[1][refIdx1];
591
12.5k
        int luma_o1 = shdr->luma_offset[1][refIdx1] * (1<<(offset_shift1_L));
592
593
12.5k
        int chroma0_w0 = shdr->ChromaWeight[0][refIdx0][0];
594
12.5k
        int chroma0_o0 = shdr->ChromaOffset[0][refIdx0][0] * (1<<(offset_shift1_C));
595
12.5k
        int chroma1_w0 = shdr->ChromaWeight[0][refIdx0][1];
596
12.5k
        int chroma1_o0 = shdr->ChromaOffset[0][refIdx0][1] * (1<<(offset_shift1_C));
597
12.5k
        int chroma0_w1 = shdr->ChromaWeight[1][refIdx1][0];
598
12.5k
        int chroma0_o1 = shdr->ChromaOffset[1][refIdx1][0] * (1<<(offset_shift1_C));
599
12.5k
        int chroma1_w1 = shdr->ChromaWeight[1][refIdx1][1];
600
12.5k
        int chroma1_o1 = shdr->ChromaOffset[1][refIdx1][1] * (1<<(offset_shift1_C));
601
602
12.5k
        logtrace(LogMotion,"weighted-BI-0 [%d] %d %d %d  %dx%d\n", refIdx0, luma_log2WD-6,luma_w0,luma_o0,nPbW,nPbH);
603
12.5k
        logtrace(LogMotion,"weighted-BI-1 [%d] %d %d %d  %dx%d\n", refIdx1, luma_log2WD-6,luma_w1,luma_o1,nPbW,nPbH);
604
605
12.5k
        int16_t* in0 = predSamplesL[0];
606
12.5k
        int16_t* in1 = predSamplesL[1];
607
608
12.5k
        ctx->acceleration.put_weighted_bipred(pixels[0], stride[0],
609
12.5k
                                              in0,in1, nCS, nPbW, nPbH,
610
12.5k
                                              luma_w0,luma_o0,
611
12.5k
                                              luma_w1,luma_o1,
612
12.5k
                                              luma_log2WD, bit_depth_L);
613
614
12.5k
        int16_t* in00 = predSamplesC[0][0];
615
12.5k
        int16_t* in01 = predSamplesC[0][1];
616
12.5k
        int16_t* in10 = predSamplesC[1][0];
617
12.5k
        int16_t* in11 = predSamplesC[1][1];
618
619
12.5k
        if (img->get_chroma_format() != de265_chroma_mono) {
620
7.13k
          ctx->acceleration.put_weighted_bipred(pixels[1], stride[1],
621
7.13k
                                                in00, in01, nCS, nPbW / SubWidthC, nPbH / SubHeightC,
622
7.13k
                                                chroma0_w0, chroma0_o0,
623
7.13k
                                                chroma0_w1, chroma0_o1,
624
7.13k
                                                chroma_log2WD, bit_depth_C);
625
7.13k
          ctx->acceleration.put_weighted_bipred(pixels[2], stride[2],
626
7.13k
                                                in10, in11, nCS, nPbW / SubWidthC, nPbH / SubHeightC,
627
7.13k
                                                chroma1_w0, chroma1_o0,
628
7.13k
                                                chroma1_w1, chroma1_o1,
629
7.13k
                                                chroma_log2WD, bit_depth_C);
630
7.13k
        }
631
12.5k
      }
632
56.2k
    }
633
295k
    else if (predFlag[0]==1 || predFlag[1]==1) {
634
295k
      int l = predFlag[0] ? 0 : 1;
635
636
295k
      if (pps->weighted_bipred_flag==0) {
637
265k
        ctx->acceleration.put_unweighted_pred(pixels[0], stride[0],
638
265k
                                              predSamplesL[l],nCS, nPbW,nPbH, bit_depth_L);
639
640
265k
        if (img->get_chroma_format() != de265_chroma_mono) {
641
110k
          ctx->acceleration.put_unweighted_pred(pixels[1], stride[1],
642
110k
                                                predSamplesC[0][l], nCS,
643
110k
                                                nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
644
110k
          ctx->acceleration.put_unweighted_pred(pixels[2], stride[2],
645
110k
                                                predSamplesC[1][l], nCS,
646
110k
                                                nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
647
110k
        }
648
265k
      }
649
29.5k
      else {
650
29.5k
        int refIdx = vi->refIdx[l];
651
652
29.5k
        int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
653
29.5k
        int chroma_log2WD = shdr->ChromaLog2WeightDenom  + shift1_C;
654
655
29.5k
        int luma_w = shdr->LumaWeight[l][refIdx];
656
29.5k
        int luma_o = shdr->luma_offset[l][refIdx] * (1<<(offset_shift1_L));
657
658
29.5k
        int chroma0_w = shdr->ChromaWeight[l][refIdx][0];
659
29.5k
        int chroma0_o = shdr->ChromaOffset[l][refIdx][0] * (1<<(offset_shift1_C));
660
29.5k
        int chroma1_w = shdr->ChromaWeight[l][refIdx][1];
661
29.5k
        int chroma1_o = shdr->ChromaOffset[l][refIdx][1] * (1<<(offset_shift1_C));
662
663
29.5k
        logtrace(LogMotion,"weighted-B-L%d [%d] %d %d %d  %dx%d\n", l, refIdx, luma_log2WD-6,luma_w,luma_o,nPbW,nPbH);
664
665
29.5k
        ctx->acceleration.put_weighted_pred(pixels[0], stride[0],
666
29.5k
                                            predSamplesL[l],nCS, nPbW,nPbH,
667
29.5k
                                            luma_w, luma_o, luma_log2WD, bit_depth_L);
668
669
29.5k
        if (img->get_chroma_format() != de265_chroma_mono) {
670
12.5k
          ctx->acceleration.put_weighted_pred(pixels[1], stride[1],
671
12.5k
                                              predSamplesC[0][l], nCS,
672
12.5k
                                              nPbW / SubWidthC, nPbH / SubHeightC,
673
12.5k
                                              chroma0_w, chroma0_o, chroma_log2WD, bit_depth_C);
674
12.5k
          ctx->acceleration.put_weighted_pred(pixels[2], stride[2],
675
12.5k
                                              predSamplesC[1][l], nCS,
676
12.5k
                                              nPbW / SubWidthC, nPbH / SubHeightC,
677
12.5k
                                              chroma1_w, chroma1_o, chroma_log2WD, bit_depth_C);
678
12.5k
        }
679
29.5k
      }
680
295k
    }
681
0
    else {
682
      // TODO: check why it can actually happen that both predFlags[] are false.
683
      // For now, we ignore this and continue decoding.
684
685
0
      ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
686
0
      img->integrity = INTEGRITY_DECODING_ERRORS;
687
0
    }
688
351k
  }
689
690
#if defined(DE265_LOG_TRACE) && 0
691
  logtrace(LogTransform,"MC pixels (luma), position %d %d:\n", xP,yP);
692
693
  for (int y=0;y<nPbH;y++) {
694
    logtrace(LogTransform,"MC-y-%d-%d ",xP,yP+y);
695
696
    for (int x=0;x<nPbW;x++) {
697
      logtrace(LogTransform,"*%02x ", pixels[0][x+y*stride[0]]);
698
    }
699
700
    logtrace(LogTransform,"*\n");
701
  }
702
703
704
  logtrace(LogTransform,"MC pixels (chroma cb), position %d %d:\n", xP/2,yP/2);
705
706
  for (int y=0;y<nPbH/2;y++) {
707
    logtrace(LogTransform,"MC-cb-%d-%d ",xP/2,yP/2+y);
708
709
    for (int x=0;x<nPbW/2;x++) {
710
      logtrace(LogTransform,"*%02x ", pixels[1][x+y*stride[1]]);
711
    }
712
713
    logtrace(LogTransform,"*\n");
714
  }
715
716
717
  logtrace(LogTransform,"MC pixels (chroma cr), position %d %d:\n", xP/2,yP/2);
718
719
  for (int y=0;y<nPbH/2;y++) {
720
    logtrace(LogTransform,"MC-cr-%d-%d ",xP/2,yP/2+y);
721
722
    for (int x=0;x<nPbW/2;x++) {
723
      logtrace(LogTransform,"*%02x ", pixels[2][x+y*stride[2]]);
724
    }
725
726
    logtrace(LogTransform,"*\n");
727
  }
728
#endif
729
427k
}
730
731
732
#ifdef DE265_LOG_TRACE
733
void logmvcand(const PBMotion& p)
734
{
735
  for (int v=0;v<2;v++) {
736
    if (p.predFlag[v]) {
737
      logtrace(LogMotion,"  %d: %s  %d;%d ref=%d\n", v, p.predFlag[v] ? "yes":"no ",
738
               p.mv[v].x,p.mv[v].y, p.refIdx[v]);
739
    } else {
740
      logtrace(LogMotion,"  %d: %s  --;-- ref=--\n", v, p.predFlag[v] ? "yes":"no ");
741
    }
742
  }
743
}
744
#else
745
#define logmvcand(p)
746
#endif
747
748
749
bool PBMotion::operator==(const PBMotion& b) const
750
122k
{
751
122k
  const PBMotion& a = *this;
752
753
  // TODO: is this really correct? no check for predFlag? Standard says so... (p.127)
754
755
259k
  for (int i=0;i<2;i++) {
756
197k
    if (a.predFlag[i] != b.predFlag[i]) return false;
757
758
181k
    if (a.predFlag[i]) {
759
141k
      if (a.mv[i].x != b.mv[i].x) return false;
760
112k
      if (a.mv[i].y != b.mv[i].y) return false;
761
103k
      if (a.refIdx[i] != b.refIdx[i]) return false;
762
103k
    }
763
181k
  }
764
765
62.2k
  return true;
766
122k
}
767
768
769
class MotionVectorAccess_de265_image : public MotionVectorAccess
770
{
771
public:
772
270k
  MotionVectorAccess_de265_image(const de265_image* i) : img(i) { }
773
774
270k
  enum PartMode get_PartMode(int x,int y) const override { return img->get_PartMode(x,y); }
775
221k
  const PBMotion& get_mv_info(int x,int y) const override { return img->get_mv_info(x,y); }
776
777
private:
778
  const de265_image* img;
779
};
780
781
782
783
/*
784
  +--+                +--+--+
785
  |B2|                |B1|B0|
786
  +--+----------------+--+--+
787
     |                   |
788
     |                   |
789
     |                   |
790
     |                   |
791
     |        PB         |
792
     |                   |
793
     |                   |
794
  +--+                   |
795
  |A1|                   |
796
  +--+-------------------+
797
  |A0|
798
  +--+
799
*/
800
801
802
// 8.5.3.1.2
803
// TODO: check: can we fill the candidate list directly in this function and omit to copy later
804
/*
805
  xC/yC:  CB position
806
  nCS:    CB size                 (probably modified because of singleMCLFlag)
807
  xP/yP:  PB position (absolute)  (probably modified because of singleMCLFlag)
808
  singleMCLFlag
809
  nPbW/nPbH: PB size
810
  partIdx
811
  out_cand: merging candidate vectors
812
813
  Add these candidates:
814
  - A1
815
  - B1  (if != A1)
816
  - B0  (if != B1)
817
  - A0  (if != A1)
818
  - B2  (if != A1 and != B1)
819
820
  A maximum of 4 candidates are generated.
821
822
  Note 1: For a CB split into two PBs, it does not make sense to merge the
823
  second part to the parameters of the first part, since then, we could use 2Nx2N
824
  right away. -> Exclude this candidate.
825
*/
826
int derive_spatial_merging_candidates(//const de265_image* img,
827
                                      const MotionVectorAccess& mvaccess,
828
                                      const de265_image* img,
829
                                      int xC, int yC, int nCS, int xP, int yP,
830
                                      uint8_t singleMCLFlag,
831
                                      int nPbW, int nPbH,
832
                                      int partIdx,
833
                                      PBMotion* out_cand,
834
                                      int maxCandidates)
835
270k
{
836
270k
  const pic_parameter_set* pps = &img->get_pps();
837
270k
  const int log2_parallel_merge_level = pps->log2_parallel_merge_level;
838
839
270k
  enum PartMode PartMode = mvaccess.get_PartMode(xC,yC);
840
841
  /*
842
  const int A0 = SpatialMergingCandidates::PRED_A0;
843
  const int A1 = SpatialMergingCandidates::PRED_A1;
844
  const int B0 = SpatialMergingCandidates::PRED_B0;
845
  const int B1 = SpatialMergingCandidates::PRED_B1;
846
  const int B2 = SpatialMergingCandidates::PRED_B2;
847
  */
848
849
  // --- A1 ---
850
851
  // a pixel within A1 (bottom right of A1)
852
270k
  int xA1 = xP-1;
853
270k
  int yA1 = yP+nPbH-1;
854
855
270k
  bool availableA1;
856
270k
  int idxA1;
857
858
270k
  int computed_candidates = 0;
859
860
  // check if candidate is in same motion-estimation region (MER) -> discard
861
270k
  if ((xP>>log2_parallel_merge_level) == (xA1>>log2_parallel_merge_level) &&
862
10.5k
      (yP>>log2_parallel_merge_level) == (yA1>>log2_parallel_merge_level)) {
863
9.85k
    availableA1 = false;
864
9.85k
    logtrace(LogMotion,"spatial merging candidate A1: below parallel merge level\n");
865
9.85k
  }
866
  // redundant candidate? (Note 1) -> discard
867
260k
  else if (// !singleMCLFlag &&    automatically true when partIdx==1
868
260k
           partIdx==1 &&
869
38.9k
           (PartMode==PART_Nx2N ||
870
19.0k
            PartMode==PART_nLx2N ||
871
22.0k
            PartMode==PART_nRx2N)) {
872
22.0k
    availableA1 = false;
873
22.0k
    logtrace(LogMotion,"spatial merging candidate A1: second part ignore\n");
874
22.0k
  }
875
  // MV available in A1
876
238k
  else {
877
238k
    availableA1 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA1,yA1);
878
238k
    if (!availableA1) logtrace(LogMotion,"spatial merging candidate A1: unavailable\n");
879
238k
  }
880
881
270k
  if (availableA1) {
882
221k
    idxA1 = computed_candidates++;
883
221k
    out_cand[idxA1] = mvaccess.get_mv_info(xA1,yA1);
884
885
221k
    logtrace(LogMotion,"spatial merging candidate A1:\n");
886
221k
    logmvcand(out_cand[idxA1]);
887
221k
  }
888
889
270k
  if (computed_candidates>=maxCandidates) return computed_candidates;
890
891
892
  // --- B1 ---
893
894
113k
  int xB1 = xP+nPbW-1;
895
113k
  int yB1 = yP-1;
896
897
113k
  bool availableB1;
898
113k
  int idxB1;
899
900
  // same MER -> discard
901
113k
  if ((xP>>log2_parallel_merge_level) == (xB1>>log2_parallel_merge_level) &&
902
56.3k
      (yP>>log2_parallel_merge_level) == (yB1>>log2_parallel_merge_level)) {
903
7.31k
    availableB1 = false;
904
7.31k
    logtrace(LogMotion,"spatial merging candidate B1: below parallel merge level\n");
905
7.31k
  }
906
  // redundant candidate (Note 1) -> discard
907
105k
  else if (// !singleMCLFlag &&    automatically true when partIdx==1
908
105k
           partIdx==1 &&
909
30.8k
           (PartMode==PART_2NxN ||
910
23.1k
            PartMode==PART_2NxnU ||
911
22.9k
            PartMode==PART_2NxnD)) {
912
8.03k
    availableB1 = false;
913
8.03k
    logtrace(LogMotion,"spatial merging candidate B1: second part ignore\n");
914
8.03k
  }
915
  // MV available in B1
916
97.7k
  else {
917
97.7k
    availableB1 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB1,yB1);
918
97.7k
    if (!availableB1) logtrace(LogMotion,"spatial merging candidate B1: unavailable\n");
919
97.7k
  }
920
921
113k
  if (availableB1) {
922
79.2k
    const PBMotion& b1 = img->get_mv_info(xB1,yB1);
923
924
    // B1 == A1 -> discard B1
925
79.2k
    if (availableA1 && out_cand[idxA1] == b1) {
926
22.9k
      idxB1 = idxA1;
927
22.9k
      logtrace(LogMotion,"spatial merging candidate B1: redundant to A1\n");
928
22.9k
    }
929
56.3k
    else {
930
56.3k
      idxB1 = computed_candidates++;
931
56.3k
      out_cand[idxB1] = b1;
932
933
56.3k
      logtrace(LogMotion,"spatial merging candidate B1:\n");
934
56.3k
      logmvcand(out_cand[idxB1]);
935
56.3k
    }
936
79.2k
  }
937
938
113k
  if (computed_candidates>=maxCandidates) return computed_candidates;
939
940
941
  // --- B0 ---
942
943
71.6k
  int xB0 = xP+nPbW;
944
71.6k
  int yB0 = yP-1;
945
946
71.6k
  bool availableB0;
947
71.6k
  int idxB0;
948
949
71.6k
  if ((xP>>log2_parallel_merge_level) == (xB0>>log2_parallel_merge_level) &&
950
6.65k
      (yP>>log2_parallel_merge_level) == (yB0>>log2_parallel_merge_level)) {
951
5.60k
    availableB0 = false;
952
5.60k
    logtrace(LogMotion,"spatial merging candidate B0: below parallel merge level\n");
953
5.60k
  }
954
66.0k
  else {
955
66.0k
    availableB0 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB0,yB0);
956
66.0k
    if (!availableB0) logtrace(LogMotion,"spatial merging candidate B0: unavailable\n");
957
66.0k
  }
958
959
71.6k
  if (availableB0) {
960
30.5k
    const PBMotion& b0 = img->get_mv_info(xB0,yB0);
961
962
    // B0 == B1 -> discard B0
963
30.5k
    if (availableB1 && out_cand[idxB1]==b0) {
964
13.0k
      idxB0 = idxB1;
965
13.0k
      logtrace(LogMotion,"spatial merging candidate B0: redundant to B1\n");
966
13.0k
    }
967
17.4k
    else {
968
17.4k
      idxB0 = computed_candidates++;
969
17.4k
      out_cand[idxB0] = b0;
970
17.4k
      logtrace(LogMotion,"spatial merging candidate B0:\n");
971
17.4k
      logmvcand(out_cand[idxB0]);
972
17.4k
    }
973
30.5k
  }
974
975
71.6k
  if (computed_candidates>=maxCandidates) return computed_candidates;
976
977
978
  // --- A0 ---
979
980
58.0k
  int xA0 = xP-1;
981
58.0k
  int yA0 = yP+nPbH;
982
983
58.0k
  bool availableA0;
984
58.0k
  int idxA0;
985
986
58.0k
  if ((xP>>log2_parallel_merge_level) == (xA0>>log2_parallel_merge_level) &&
987
7.68k
      (yP>>log2_parallel_merge_level) == (yA0>>log2_parallel_merge_level)) {
988
5.01k
    availableA0 = false;
989
5.01k
    logtrace(LogMotion,"spatial merging candidate A0: below parallel merge level\n");
990
5.01k
  }
991
53.0k
  else {
992
53.0k
    availableA0 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA0,yA0);
993
53.0k
    if (!availableA0) logtrace(LogMotion,"spatial merging candidate A0: unavailable\n");
994
53.0k
  }
995
996
58.0k
  if (availableA0) {
997
9.59k
    const PBMotion& a0 = img->get_mv_info(xA0,yA0);
998
999
    // A0 == A1 -> discard A0
1000
9.59k
    if (availableA1 && out_cand[idxA1]==a0) {
1001
6.45k
      idxA0 = idxA1;
1002
6.45k
      logtrace(LogMotion,"spatial merging candidate A0: redundant to A1\n");
1003
6.45k
    }
1004
3.13k
    else {
1005
3.13k
      idxA0 = computed_candidates++;
1006
3.13k
      out_cand[idxA0] = a0;
1007
3.13k
      logtrace(LogMotion,"spatial merging candidate A0:\n");
1008
3.13k
      logmvcand(out_cand[idxA0]);
1009
3.13k
    }
1010
9.59k
  }
1011
1012
58.0k
  if (computed_candidates>=maxCandidates) return computed_candidates;
1013
1014
1015
  // --- B2 ---
1016
1017
55.7k
  int xB2 = xP-1;
1018
55.7k
  int yB2 = yP-1;
1019
1020
55.7k
  bool availableB2;
1021
55.7k
  int idxB2;
1022
1023
  // if we already have four candidates, do not consider B2 anymore
1024
55.7k
  if (computed_candidates==4) {
1025
59
    availableB2 = false;
1026
59
    logtrace(LogMotion,"spatial merging candidate B2: ignore\n");
1027
59
  }
1028
55.7k
  else if ((xP>>log2_parallel_merge_level) == (xB2>>log2_parallel_merge_level) &&
1029
7.68k
           (yP>>log2_parallel_merge_level) == (yB2>>log2_parallel_merge_level)) {
1030
6.71k
    availableB2 = false;
1031
6.71k
    logtrace(LogMotion,"spatial merging candidate B2: below parallel merge level\n");
1032
6.71k
  }
1033
48.9k
  else {
1034
48.9k
    availableB2 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB2,yB2);
1035
48.9k
    if (!availableB2) logtrace(LogMotion,"spatial merging candidate B2: unavailable\n");
1036
48.9k
  }
1037
1038
55.7k
  if (availableB2) {
1039
29.0k
    const PBMotion& b2 = img->get_mv_info(xB2,yB2);
1040
1041
    // B2 == B1 -> discard B2
1042
29.0k
    if (availableB1 && out_cand[idxB1]==b2) {
1043
14.1k
      idxB2 = idxB1;
1044
14.1k
      logtrace(LogMotion,"spatial merging candidate B2: redundant to B1\n");
1045
14.1k
    }
1046
    // B2 == A1 -> discard B2
1047
14.9k
    else if (availableA1 && out_cand[idxA1]==b2) {
1048
5.66k
      idxB2 = idxA1;
1049
5.66k
      logtrace(LogMotion,"spatial merging candidate B2: redundant to A1\n");
1050
5.66k
    }
1051
9.25k
    else {
1052
9.25k
      idxB2 = computed_candidates++;
1053
9.25k
      out_cand[idxB2] = b2;
1054
9.25k
      logtrace(LogMotion,"spatial merging candidate B2:\n");
1055
9.25k
      logmvcand(out_cand[idxB2]);
1056
9.25k
    }
1057
29.0k
  }
1058
1059
55.7k
  return computed_candidates;
1060
58.0k
}
1061
1062
1063
// 8.5.3.1.4
1064
void derive_zero_motion_vector_candidates(const slice_segment_header* shdr,
1065
                                          PBMotion* out_mergeCandList,
1066
                                          int* inout_numCurrMergeCand,
1067
                                          int maxCandidates)
1068
270k
{
1069
270k
  logtrace(LogMotion,"derive_zero_motion_vector_candidates\n");
1070
1071
270k
  int numRefIdx;
1072
1073
270k
  if (shdr->slice_type==SLICE_TYPE_P) {
1074
56.1k
    numRefIdx = shdr->num_ref_idx_l0_active;
1075
56.1k
  }
1076
214k
  else {
1077
214k
    numRefIdx = libde265_min(shdr->num_ref_idx_l0_active,
1078
214k
                             shdr->num_ref_idx_l1_active);
1079
214k
  }
1080
1081
1082
  //int numInputMergeCand = *inout_numMergeCand;
1083
270k
  int zeroIdx = 0;
1084
1085
339k
  while (*inout_numCurrMergeCand < maxCandidates) {
1086
    // 1.
1087
1088
68.9k
    logtrace(LogMotion,"zeroIdx:%d numRefIdx:%d\n", zeroIdx, numRefIdx);
1089
1090
68.9k
    PBMotion* newCand = &out_mergeCandList[*inout_numCurrMergeCand];
1091
1092
68.9k
    const int refIdx = (zeroIdx < numRefIdx) ? zeroIdx : 0;
1093
1094
68.9k
    if (shdr->slice_type==SLICE_TYPE_P) {
1095
29.2k
      newCand->refIdx[0] = refIdx;
1096
29.2k
      newCand->refIdx[1] = 0;
1097
29.2k
      newCand->predFlag[0] = 1;
1098
29.2k
      newCand->predFlag[1] = 0;
1099
29.2k
    }
1100
39.6k
    else {
1101
39.6k
      newCand->refIdx[0] = refIdx;
1102
39.6k
      newCand->refIdx[1] = refIdx;
1103
39.6k
      newCand->predFlag[0] = 1;
1104
39.6k
      newCand->predFlag[1] = 1;
1105
39.6k
    }
1106
1107
68.9k
    newCand->mv[0].x = 0;
1108
68.9k
    newCand->mv[0].y = 0;
1109
68.9k
    newCand->mv[1].x = 0;
1110
68.9k
    newCand->mv[1].y = 0;
1111
1112
68.9k
    (*inout_numCurrMergeCand)++;
1113
1114
    // 2.
1115
1116
68.9k
    zeroIdx++;
1117
68.9k
  }
1118
270k
}
1119
1120
1121
bool scale_mv(MotionVector* out_mv, MotionVector mv, int colDist, int currDist)
1122
5.40k
{
1123
5.40k
  int td = Clip3(-128,127, colDist);
1124
5.40k
  int tb = Clip3(-128,127, currDist);
1125
1126
5.40k
  if (td==0) {
1127
32
    *out_mv = mv;
1128
32
    return false;
1129
32
  }
1130
5.37k
  else {
1131
5.37k
    int tx = (16384 + (abs_value(td)>>1)) / td;
1132
5.37k
    int distScaleFactor = Clip3(-4096,4095, (tb*tx+32)>>6);
1133
5.37k
    out_mv->x = Clip3(-32768,32767,
1134
5.37k
                      Sign(distScaleFactor*mv.x)*((abs_value(distScaleFactor*mv.x)+127)>>8));
1135
5.37k
    out_mv->y = Clip3(-32768,32767,
1136
5.37k
                      Sign(distScaleFactor*mv.y)*((abs_value(distScaleFactor*mv.y)+127)>>8));
1137
5.37k
    return true;
1138
5.37k
  }
1139
5.40k
}
1140
1141
1142
// (L1003) 8.5.3.2.8
1143
1144
void derive_collocated_motion_vectors(base_context* ctx,
1145
                                      de265_image* img,
1146
                                      const slice_segment_header* shdr,
1147
                                      int xP,int yP,
1148
                                      int colPic,
1149
                                      int xColPb,int yColPb,
1150
                                      int refIdxLX,  // (always 0 for merge mode)
1151
                                      int X,
1152
                                      MotionVector* out_mvLXCol,
1153
                                      uint8_t* out_availableFlagLXCol)
1154
39.1k
{
1155
39.1k
  logtrace(LogMotion,"derive_collocated_motion_vectors %d;%d\n",xP,yP);
1156
1157
1158
  // get collocated image and the prediction mode at the collocated position
1159
1160
39.1k
  assert(ctx->has_image(colPic));
1161
39.1k
  const de265_image* colImg = ctx->get_image(colPic);
1162
1163
  // check for access outside image area
1164
1165
39.1k
  if (xColPb >= colImg->get_width() ||
1166
39.1k
      yColPb >= colImg->get_height()) {
1167
0
    ctx->add_warning(DE265_WARNING_COLLOCATED_MOTION_VECTOR_OUTSIDE_IMAGE_AREA, false);
1168
0
    *out_availableFlagLXCol = 0;
1169
0
    return;
1170
0
  }
1171
1172
39.1k
  enum PredMode predMode = colImg->get_pred_mode(xColPb,yColPb);
1173
1174
1175
  // collocated block is Intra -> no collocated MV
1176
1177
39.1k
  if (predMode == MODE_INTRA) {
1178
36.3k
    out_mvLXCol->x = 0;
1179
36.3k
    out_mvLXCol->y = 0;
1180
36.3k
    *out_availableFlagLXCol = 0;
1181
36.3k
    return;
1182
36.3k
  }
1183
1184
1185
2.78k
  logtrace(LogMotion,"colPic:%d (POC=%d) X:%d refIdxLX:%d refpiclist:%d\n",
1186
2.78k
           colPic,
1187
2.78k
           colImg->PicOrderCntVal,
1188
2.78k
           X,refIdxLX,shdr->RefPicList[X][refIdxLX]);
1189
1190
1191
  // collocated reference image is unavailable -> no collocated MV
1192
1193
2.78k
  if (colImg->integrity == INTEGRITY_UNAVAILABLE_REFERENCE) {
1194
0
    out_mvLXCol->x = 0;
1195
0
    out_mvLXCol->y = 0;
1196
0
    *out_availableFlagLXCol = 0;
1197
0
    return;
1198
0
  }
1199
1200
1201
  // get the collocated MV
1202
1203
2.78k
  const PBMotion& mvi = colImg->get_mv_info(xColPb,yColPb);
1204
2.78k
  int listCol;
1205
2.78k
  int refIdxCol;
1206
2.78k
  MotionVector mvCol;
1207
1208
2.78k
  logtrace(LogMotion,"read MVI %d;%d:\n",xColPb,yColPb);
1209
2.78k
  logmvcand(mvi);
1210
1211
1212
  // collocated MV uses only L1 -> use L1
1213
2.78k
  if (mvi.predFlag[0]==0) {
1214
1.12k
    mvCol = mvi.mv[1];
1215
1.12k
    refIdxCol = mvi.refIdx[1];
1216
1.12k
    listCol = 1;
1217
1.12k
  }
1218
  // collocated MV uses only L0 -> use L0
1219
1.66k
  else if (mvi.predFlag[1]==0) {
1220
816
    mvCol = mvi.mv[0];
1221
816
    refIdxCol = mvi.refIdx[0];
1222
816
    listCol = 0;
1223
816
  }
1224
  // collocated MV uses L0 and L1
1225
845
  else {
1226
845
    bool allRefFramesBeforeCurrentFrame = true;
1227
1228
845
    const int currentPOC = img->PicOrderCntVal;
1229
1230
    // all reference POCs earlier than current POC (list 1)
1231
    // Test L1 first, because there is a higher change to find a future reference frame.
1232
1233
1.78k
    for (int rIdx=0; rIdx<shdr->num_ref_idx_l1_active && allRefFramesBeforeCurrentFrame; rIdx++)
1234
936
      {
1235
936
        const de265_image* refimg = ctx->get_image(shdr->RefPicList[1][rIdx]);
1236
936
        int refPOC = refimg->PicOrderCntVal;
1237
1238
936
        if (refPOC > currentPOC) {
1239
36
          allRefFramesBeforeCurrentFrame = false;
1240
36
        }
1241
936
      }
1242
1243
    // all reference POCs earlier than current POC (list 0)
1244
1245
1.77k
    for (int rIdx=0; rIdx<shdr->num_ref_idx_l0_active && allRefFramesBeforeCurrentFrame; rIdx++)
1246
933
      {
1247
933
        const de265_image* refimg = ctx->get_image(shdr->RefPicList[0][rIdx]);
1248
933
        int refPOC = refimg->PicOrderCntVal;
1249
1250
933
        if (refPOC > currentPOC) {
1251
23
          allRefFramesBeforeCurrentFrame = false;
1252
23
        }
1253
933
      }
1254
1255
1256
    /* TODO: What is the rationale behind this ???
1257
1258
       My guess:
1259
       when there are images before the current frame (most probably in L0) and images after
1260
       the current frame (most probably in L1), we take the reference in the opposite
1261
       direction than where the collocated frame is positioned in the hope that the distance
1262
       to the current frame will be smaller and thus give a better prediction.
1263
1264
       If all references point into the past, we cannot say much about the temporal order or
1265
       L0,L1 and thus take over both parts.
1266
     */
1267
1268
845
    if (allRefFramesBeforeCurrentFrame) {
1269
786
      mvCol = mvi.mv[X];
1270
786
      refIdxCol = mvi.refIdx[X];
1271
786
      listCol = X;
1272
786
    }
1273
59
    else {
1274
59
      int N = shdr->collocated_from_l0_flag;
1275
59
      mvCol = mvi.mv[N];
1276
59
      refIdxCol = mvi.refIdx[N];
1277
59
      listCol = N;
1278
59
    }
1279
845
  }
1280
1281
1282
1283
2.78k
  uint16_t slice_hdr_idx = colImg->get_SliceHeaderIndex(xColPb,yColPb);
1284
2.78k
  if (slice_hdr_idx >= colImg->slices.size()) {
1285
0
    ctx->add_warning(DE265_WARNING_INVALID_SLICE_HEADER_INDEX_ACCESS, false);
1286
1287
0
    *out_availableFlagLXCol = 0;
1288
0
    out_mvLXCol->x = 0;
1289
0
    out_mvLXCol->y = 0;
1290
0
    return;
1291
0
  }
1292
1293
2.78k
  const slice_segment_header* colShdr = colImg->slices[ colImg->get_SliceHeaderIndex(xColPb,yColPb) ];
1294
1295
2.78k
  if (shdr->LongTermRefPic[X][refIdxLX] !=
1296
2.78k
      colShdr->LongTermRefPic[listCol][refIdxCol]) {
1297
15
    *out_availableFlagLXCol = 0;
1298
15
    out_mvLXCol->x = 0;
1299
15
    out_mvLXCol->y = 0;
1300
15
  }
1301
2.76k
  else {
1302
2.76k
    *out_availableFlagLXCol = 1;
1303
1304
2.76k
    const bool isLongTerm = shdr->LongTermRefPic[X][refIdxLX];
1305
1306
2.76k
    int colDist  = colImg->PicOrderCntVal - colShdr->RefPicList_POC[listCol][refIdxCol];
1307
2.76k
    int currDist = img->PicOrderCntVal - shdr->RefPicList_POC[X][refIdxLX];
1308
1309
2.76k
    logtrace(LogMotion,"COLPOCDIFF %d %d [%d %d / %d %d]\n",colDist, currDist,
1310
2.76k
             colImg->PicOrderCntVal, colShdr->RefPicList_POC[listCol][refIdxCol],
1311
2.76k
             img->PicOrderCntVal, shdr->RefPicList_POC[X][refIdxLX]
1312
2.76k
             );
1313
1314
2.76k
    if (isLongTerm || colDist == currDist) {
1315
2.72k
      *out_mvLXCol = mvCol;
1316
2.72k
    }
1317
42
    else {
1318
42
      if (!scale_mv(out_mvLXCol, mvCol, colDist, currDist)) {
1319
32
        ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1320
32
        img->integrity = INTEGRITY_DECODING_ERRORS;
1321
32
      }
1322
1323
42
      logtrace(LogMotion,"scale: %d;%d to %d;%d\n",
1324
42
               mvCol.x,mvCol.y, out_mvLXCol->x,out_mvLXCol->y);
1325
42
    }
1326
2.76k
  }
1327
2.78k
}
1328
1329
1330
// 8.5.3.1.7
1331
void derive_temporal_luma_vector_prediction(base_context* ctx,
1332
                                            de265_image* img,
1333
                                            const slice_segment_header* shdr,
1334
                                            int xP,int yP,
1335
                                            int nPbW,int nPbH,
1336
                                            int refIdxL,
1337
                                            int X, // which MV (L0/L1) to get
1338
                                            MotionVector* out_mvLXCol,
1339
                                            uint8_t*      out_availableFlagLXCol)
1340
167k
{
1341
  // --- no temporal MVP -> exit ---
1342
1343
167k
  if (shdr->slice_temporal_mvp_enabled_flag == 0) {
1344
141k
    out_mvLXCol->x = 0;
1345
141k
    out_mvLXCol->y = 0;
1346
141k
    *out_availableFlagLXCol = 0;
1347
141k
    return;
1348
141k
  }
1349
1350
1351
  // --- find collocated reference image ---
1352
1353
26.3k
  int Log2CtbSizeY = img->get_sps().Log2CtbSizeY;
1354
1355
26.3k
  int colPic; // TODO: this is the same for the whole slice. We can precompute it.
1356
1357
26.3k
  if (shdr->slice_type == SLICE_TYPE_B &&
1358
19.8k
      shdr->collocated_from_l0_flag == 0)
1359
7.57k
    {
1360
7.57k
      logtrace(LogMotion,"collocated L1 ref_idx=%d\n",shdr->collocated_ref_idx);
1361
1362
7.57k
      colPic = shdr->RefPicList[1][ shdr->collocated_ref_idx ];
1363
7.57k
    }
1364
18.8k
  else
1365
18.8k
    {
1366
18.8k
      logtrace(LogMotion,"collocated L0 ref_idx=%d\n",shdr->collocated_ref_idx);
1367
1368
18.8k
      colPic = shdr->RefPicList[0][ shdr->collocated_ref_idx ];
1369
18.8k
    }
1370
1371
1372
  // check whether collocated reference picture exists
1373
1374
26.3k
  if (!ctx->has_image(colPic)) {
1375
0
    out_mvLXCol->x = 0;
1376
0
    out_mvLXCol->y = 0;
1377
0
    *out_availableFlagLXCol = 0;
1378
1379
0
    ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
1380
0
    return;
1381
0
  }
1382
1383
1384
  // --- get collocated MV either at bottom-right corner or from center of PB ---
1385
1386
26.3k
  int xColPb,yColPb;
1387
26.3k
  int yColBr = yP + nPbH; // bottom right collocated motion vector position
1388
26.3k
  int xColBr = xP + nPbW;
1389
1390
  /* If neighboring pixel at bottom-right corner is in the same CTB-row and inside the image,
1391
     use this (reduced down to 16 pixels resolution) as collocated MV position.
1392
1393
     Note: see 2014, Sze, Sect. 5.2.1.2 why candidate C0 is excluded when on another CTB-row.
1394
     This is to reduce the memory bandwidth requirements.
1395
   */
1396
26.3k
  if ((yP>>Log2CtbSizeY) == (yColBr>>Log2CtbSizeY) &&
1397
16.8k
      xColBr < img->get_sps().pic_width_in_luma_samples &&
1398
15.4k
      yColBr < img->get_sps().pic_height_in_luma_samples)
1399
12.9k
    {
1400
12.9k
      xColPb = xColBr & ~0x0F; // reduce resolution of collocated motion-vectors to 16 pixels grid
1401
12.9k
      yColPb = yColBr & ~0x0F;
1402
1403
12.9k
      derive_collocated_motion_vectors(ctx,img,shdr, xP,yP, colPic, xColPb,yColPb, refIdxL, X,
1404
12.9k
                                       out_mvLXCol, out_availableFlagLXCol);
1405
12.9k
    }
1406
13.4k
  else
1407
13.4k
    {
1408
13.4k
      out_mvLXCol->x = 0;
1409
13.4k
      out_mvLXCol->y = 0;
1410
13.4k
      *out_availableFlagLXCol = 0;
1411
13.4k
    }
1412
1413
1414
26.3k
  if (*out_availableFlagLXCol==0) {
1415
1416
26.1k
    int xColCtr = xP+(nPbW>>1);
1417
26.1k
    int yColCtr = yP+(nPbH>>1);
1418
1419
26.1k
    xColPb = xColCtr & ~0x0F; // reduce resolution of collocated motion-vectors to 16 pixels grid
1420
26.1k
    yColPb = yColCtr & ~0x0F;
1421
1422
26.1k
    derive_collocated_motion_vectors(ctx,img,shdr, xP,yP, colPic, xColPb,yColPb, refIdxL, X,
1423
26.1k
                                     out_mvLXCol, out_availableFlagLXCol);
1424
26.1k
  }
1425
26.3k
}
1426
1427
1428
static int table_8_19[2][12] = {
1429
  { 0,1,0,2,1,2,0,3,1,3,2,3 },
1430
  { 1,0,2,0,2,1,3,0,3,1,3,2 }
1431
  };
1432
1433
// 8.5.3.1.3
1434
/* Note (TODO): during decoding, we know which of the candidates we will select.
1435
+   Hence, we do not really have to generate the other ones...
1436
+ */
1437
void derive_combined_bipredictive_merging_candidates(const base_context* ctx,
1438
                                                     const slice_segment_header* shdr,
1439
                                                     PBMotion* inout_mergeCandList,
1440
                                                     int* inout_numMergeCand,
1441
                                                     int maxCandidates)
1442
214k
{
1443
214k
  if (*inout_numMergeCand>1 && *inout_numMergeCand < maxCandidates) {
1444
7.30k
    int numOrigMergeCand = *inout_numMergeCand;
1445
1446
7.30k
    int numInputMergeCand = *inout_numMergeCand;
1447
7.30k
    int combIdx = 0;
1448
7.30k
    uint8_t combStop = false;
1449
1450
23.2k
    while (!combStop) {
1451
15.9k
      int l0CandIdx = table_8_19[0][combIdx];
1452
15.9k
      int l1CandIdx = table_8_19[1][combIdx];
1453
1454
15.9k
      if (l0CandIdx >= numInputMergeCand ||
1455
15.9k
          l1CandIdx >= numInputMergeCand) {
1456
0
        assert(false); // bitstream error -> TODO: conceal error
1457
0
      }
1458
1459
15.9k
      PBMotion& l0Cand = inout_mergeCandList[l0CandIdx];
1460
15.9k
      PBMotion& l1Cand = inout_mergeCandList[l1CandIdx];
1461
1462
15.9k
      logtrace(LogMotion,"add bipredictive merging candidate (combIdx:%d)\n",combIdx);
1463
15.9k
      logtrace(LogMotion,"l0Cand:\n"); logmvcand(l0Cand);
1464
15.9k
      logtrace(LogMotion,"l1Cand:\n"); logmvcand(l1Cand);
1465
1466
15.9k
      const de265_image* img0 = l0Cand.predFlag[0] ? ctx->get_image(shdr->RefPicList[0][l0Cand.refIdx[0]]) : nullptr;
1467
15.9k
      const de265_image* img1 = l1Cand.predFlag[1] ? ctx->get_image(shdr->RefPicList[1][l1Cand.refIdx[1]]) : nullptr;
1468
1469
15.9k
      if (l0Cand.predFlag[0] && !img0) {
1470
0
        return; // TODO error
1471
0
      }
1472
1473
15.9k
      if (l1Cand.predFlag[1] && !img1) {
1474
0
        return; // TODO error
1475
0
      }
1476
1477
15.9k
      if (l0Cand.predFlag[0] && l1Cand.predFlag[1] &&
1478
8.27k
          (img0->PicOrderCntVal != img1->PicOrderCntVal     ||
1479
6.66k
           l0Cand.mv[0].x != l1Cand.mv[1].x ||
1480
5.17k
           l0Cand.mv[0].y != l1Cand.mv[1].y)) {
1481
5.17k
        PBMotion& p = inout_mergeCandList[ *inout_numMergeCand ];
1482
5.17k
        p.refIdx[0] = l0Cand.refIdx[0];
1483
5.17k
        p.refIdx[1] = l1Cand.refIdx[1];
1484
5.17k
        p.predFlag[0] = l0Cand.predFlag[0];
1485
5.17k
        p.predFlag[1] = l1Cand.predFlag[1];
1486
5.17k
        p.mv[0] = l0Cand.mv[0];
1487
5.17k
        p.mv[1] = l1Cand.mv[1];
1488
5.17k
        (*inout_numMergeCand)++;
1489
1490
5.17k
        logtrace(LogMotion,"result:\n");
1491
5.17k
        logmvcand(p);
1492
5.17k
      }
1493
1494
15.9k
      combIdx++;
1495
15.9k
      if (combIdx == numOrigMergeCand*(numOrigMergeCand-1) ||
1496
11.0k
          *inout_numMergeCand == maxCandidates) {
1497
7.30k
        combStop = true;
1498
7.30k
      }
1499
15.9k
    }
1500
7.30k
  }
1501
214k
}
1502
1503
1504
// 8.5.3.1.1
1505
1506
void get_merge_candidate_list_without_step_9(base_context* ctx,
1507
                                             const slice_segment_header* shdr,
1508
                                             const MotionVectorAccess& mvaccess,
1509
                                             de265_image* img,
1510
                                             int xC,int yC, int xP,int yP,
1511
                                             int nCS, int nPbW,int nPbH, int partIdx,
1512
                                             int max_merge_idx,
1513
                                             PBMotion* mergeCandList)
1514
270k
{
1515
1516
  //int xOrigP = xP;
1517
  //int yOrigP = yP;
1518
  //int nOrigPbW = nPbW;
1519
  //int nOrigPbH = nPbH;
1520
1521
270k
  int singleMCLFlag; // single merge-candidate-list (MCL) flag
1522
1523
  /* Use single MCL for CBs of size 8x8, except when parallel-merge-level is at 4x4.
1524
     Without this flag, PBs smaller than 8x8 would not receive as much merging candidates.
1525
     Having additional candidates might have these advantages:
1526
     - coding MVs for these small PBs is expensive, and
1527
     - since the PBs are not far away from a proper (neighboring) merging candidate,
1528
     the quality of the candidates will still be good.
1529
  */
1530
270k
  singleMCLFlag = (img->get_pps().log2_parallel_merge_level > 2 && nCS==8);
1531
1532
270k
  if (singleMCLFlag) {
1533
43.5k
    xP=xC;
1534
43.5k
    yP=yC;
1535
43.5k
    nPbW=nCS;
1536
43.5k
    nPbH=nCS;
1537
43.5k
    partIdx=0;
1538
43.5k
  }
1539
1540
270k
  int maxCandidates = max_merge_idx+1;
1541
  //MotionVectorSpec mergeCandList[5];
1542
270k
  int numMergeCand=0;
1543
1544
  // --- spatial merge candidates
1545
1546
270k
  numMergeCand = derive_spatial_merging_candidates(mvaccess,
1547
270k
                                                   img, xC,yC, nCS, xP,yP, singleMCLFlag,
1548
270k
                                                   nPbW,nPbH,partIdx, mergeCandList,
1549
270k
                                                   maxCandidates);
1550
1551
  // --- collocated merge candidate
1552
270k
  if (numMergeCand < maxCandidates) {
1553
49.4k
    int refIdxCol[2] = { 0,0 };
1554
1555
49.4k
    MotionVector mvCol[2];
1556
49.4k
    uint8_t predFlagLCol[2];
1557
49.4k
    derive_temporal_luma_vector_prediction(ctx,img,shdr, xP,yP,nPbW,nPbH,
1558
49.4k
                                           refIdxCol[0],0, &mvCol[0],
1559
49.4k
                                           &predFlagLCol[0]);
1560
1561
49.4k
    uint8_t availableFlagCol = predFlagLCol[0];
1562
49.4k
    predFlagLCol[1] = 0;
1563
1564
49.4k
    if (shdr->slice_type == SLICE_TYPE_B) {
1565
29.8k
      derive_temporal_luma_vector_prediction(ctx,img,shdr,
1566
29.8k
                                             xP,yP,nPbW,nPbH, refIdxCol[1],1, &mvCol[1],
1567
29.8k
                                             &predFlagLCol[1]);
1568
29.8k
      availableFlagCol |= predFlagLCol[1];
1569
29.8k
    }
1570
1571
1572
49.4k
    if (availableFlagCol) {
1573
536
      PBMotion* colVec = &mergeCandList[numMergeCand++];
1574
1575
536
      colVec->mv[0] = mvCol[0];
1576
536
      colVec->mv[1] = mvCol[1];
1577
536
      colVec->predFlag[0] = predFlagLCol[0];
1578
536
      colVec->predFlag[1] = predFlagLCol[1];
1579
536
      colVec->refIdx[0] = refIdxCol[0];
1580
536
      colVec->refIdx[1] = refIdxCol[1];
1581
536
    }
1582
49.4k
  }
1583
1584
1585
  // --- bipredictive merge candidates ---
1586
1587
270k
  if (shdr->slice_type == SLICE_TYPE_B) {
1588
214k
    derive_combined_bipredictive_merging_candidates(ctx, shdr,
1589
214k
                                                    mergeCandList, &numMergeCand, maxCandidates);
1590
214k
  }
1591
1592
1593
  // --- zero-vector merge candidates ---
1594
1595
270k
  derive_zero_motion_vector_candidates(shdr, mergeCandList, &numMergeCand, maxCandidates);
1596
1597
1598
270k
  logtrace(LogMotion,"mergeCandList:\n");
1599
1.38M
  for (int i=0;i<shdr->MaxNumMergeCand;i++)
1600
1.11M
    {
1601
      //logtrace(LogMotion, " %d:%s\n", i, i==merge_idx ? " SELECTED":"");
1602
1.11M
      logmvcand(mergeCandList[i]);
1603
1.11M
    }
1604
270k
}
1605
1606
1607
1608
void get_merge_candidate_list(base_context* ctx,
1609
                              const slice_segment_header* shdr,
1610
                              de265_image* img,
1611
                              int xC,int yC, int xP,int yP,
1612
                              int nCS, int nPbW,int nPbH, int partIdx,
1613
                              PBMotion* mergeCandList)
1614
0
{
1615
0
  int max_merge_idx = 5-shdr->five_minus_max_num_merge_cand -1;
1616
1617
0
  get_merge_candidate_list_without_step_9(ctx, shdr,
1618
0
                                          MotionVectorAccess_de265_image(img), img,
1619
0
                                          xC,yC,xP,yP,nCS,nPbW,nPbH, partIdx,
1620
0
                                          max_merge_idx, mergeCandList);
1621
1622
  // 9. for encoder: modify all merge candidates
1623
1624
0
  for (int i=0;i<=max_merge_idx;i++) {
1625
0
    if (mergeCandList[i].predFlag[0] &&
1626
0
        mergeCandList[i].predFlag[1] &&
1627
0
        nPbW+nPbH==12)
1628
0
      {
1629
0
        mergeCandList[i].refIdx[1]   = 0;
1630
0
        mergeCandList[i].predFlag[1] = 0;
1631
0
      }
1632
0
  }
1633
0
}
1634
1635
1636
void derive_luma_motion_merge_mode(base_context* ctx,
1637
                                   const slice_segment_header* shdr,
1638
                                   de265_image* img,
1639
                                   int xC,int yC, int xP,int yP,
1640
                                   int nCS, int nPbW,int nPbH, int partIdx,
1641
                                   int merge_idx,
1642
                                   PBMotion* out_vi)
1643
270k
{
1644
270k
  PBMotion mergeCandList[5];
1645
1646
270k
  get_merge_candidate_list_without_step_9(ctx, shdr,
1647
270k
                                          MotionVectorAccess_de265_image(img), img,
1648
270k
                                          xC,yC,xP,yP,nCS,nPbW,nPbH, partIdx,
1649
270k
                                          merge_idx, mergeCandList);
1650
1651
1652
270k
  *out_vi = mergeCandList[merge_idx];
1653
1654
  // 8.5.3.1.1 / 9.
1655
1656
270k
  if (out_vi->predFlag[0] && out_vi->predFlag[1] && nPbW+nPbH==12) {
1657
17.1k
    out_vi->refIdx[1] = 0;
1658
17.1k
    out_vi->predFlag[1] = 0;
1659
17.1k
  }
1660
270k
}
1661
1662
1663
// 8.5.3.1.6
1664
void derive_spatial_luma_vector_prediction(base_context* ctx,
1665
                                           de265_image* img,
1666
                                           const slice_segment_header* shdr,
1667
                                           int xC,int yC,int nCS,int xP,int yP,
1668
                                           int nPbW,int nPbH, int X,
1669
                                           int refIdxLX, int partIdx,
1670
                                           uint8_t out_availableFlagLXN[2],
1671
                                           MotionVector out_mvLXN[2])
1672
182k
{
1673
182k
  if (refIdxLX >= MAX_NUM_REF_PICS) {
1674
0
    ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1675
0
    img->integrity = INTEGRITY_DECODING_ERRORS;
1676
1677
0
    out_availableFlagLXN[0] = false;
1678
0
    out_availableFlagLXN[1] = false;
1679
0
    out_mvLXN[0] = MotionVector();
1680
0
    out_mvLXN[1] = MotionVector();
1681
0
    return;
1682
0
  }
1683
1684
182k
  int isScaledFlagLX = 0;
1685
1686
182k
  const int A=0;
1687
182k
  const int B=1;
1688
1689
182k
  out_availableFlagLXN[A] = 0;
1690
182k
  out_availableFlagLXN[B] = 0;
1691
1692
1693
  // --- A ---
1694
1695
  // 1.
1696
1697
182k
  int xA[2], yA[2];
1698
182k
  xA[0] = xP-1;
1699
182k
  yA[0] = yP + nPbH;
1700
182k
  xA[1] = xA[0];
1701
182k
  yA[1] = yA[0]-1;
1702
1703
  // 2.
1704
1705
182k
  out_availableFlagLXN[A] = 0;
1706
182k
  out_mvLXN[A].x = 0;
1707
182k
  out_mvLXN[A].y = 0;
1708
1709
  // 3. / 4.
1710
1711
182k
  bool availableA[2];
1712
182k
  availableA[0] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA[0],yA[0]);
1713
182k
  availableA[1] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA[1],yA[1]);
1714
1715
  // 5.
1716
1717
182k
  if (availableA[0] || availableA[1]) {
1718
165k
    isScaledFlagLX = 1;
1719
165k
  }
1720
1721
  // 6.  test A0 and A1  (Ak)
1722
1723
182k
  int refIdxA=-1;
1724
1725
  // the POC we want to reference in this PB
1726
182k
  const de265_image* tmpimg = ctx->get_image(shdr->RefPicList[X][ refIdxLX ]);
1727
182k
  if (tmpimg==nullptr) { return; }
1728
182k
  const int referenced_POC = tmpimg->PicOrderCntVal;
1729
1730
547k
  for (int k=0;k<=1;k++) {
1731
1732
364k
    if (availableA[k] &&
1733
204k
        out_availableFlagLXN[A]==0 && // no A?-predictor so far
1734
167k
        img->get_pred_mode(xA[k],yA[k]) != MODE_INTRA) {
1735
1736
167k
      int Y=1-X;
1737
1738
167k
      const PBMotion& vi = img->get_mv_info(xA[k],yA[k]);
1739
167k
      logtrace(LogMotion,"MVP A%d=\n",k);
1740
167k
      logmvcand(vi);
1741
1742
167k
      const de265_image* imgX = nullptr;
1743
167k
      if (vi.predFlag[X]) {
1744
152k
        imgX = ctx->get_image(shdr->RefPicList[X][ vi.refIdx[X] ]);
1745
152k
      }
1746
1747
167k
      const de265_image* imgY = nullptr;
1748
167k
      if (vi.predFlag[Y]) {
1749
59.7k
        imgY = ctx->get_image(shdr->RefPicList[Y][ vi.refIdx[Y] ]);
1750
59.7k
      }
1751
1752
      // check whether the predictor X is available and references the same POC
1753
167k
      if (vi.predFlag[X] && imgX && imgX->PicOrderCntVal == referenced_POC) {
1754
1755
143k
        logtrace(LogMotion,"take A%d/L%d as A candidate with same POC\n",k,X);
1756
1757
143k
        out_availableFlagLXN[A]=1;
1758
143k
        out_mvLXN[A] = vi.mv[X];
1759
143k
        refIdxA = vi.refIdx[X];
1760
143k
      }
1761
      // check whether the other predictor (Y) is available and references the same POC
1762
23.6k
      else if (vi.predFlag[Y] && imgY && imgY->PicOrderCntVal == referenced_POC) {
1763
1764
12.0k
        logtrace(LogMotion,"take A%d/L%d as A candidate with same POC\n",k,Y);
1765
1766
12.0k
        out_availableFlagLXN[A]=1;
1767
12.0k
        out_mvLXN[A] = vi.mv[Y];
1768
12.0k
        refIdxA = vi.refIdx[Y];
1769
12.0k
      }
1770
167k
    }
1771
364k
  }
1772
1773
  // 7. If there is no predictor referencing the same POC, we take any other reference as
1774
  //    long as it is the same type of reference (long-term / short-term)
1775
1776
234k
  for (int k=0 ; k<=1 && out_availableFlagLXN[A]==0 ; k++) {
1777
51.6k
    int refPicList=-1;
1778
1779
51.6k
    if (availableA[k] &&
1780
        // TODO: we could remove this call by storing the result of the similar computation above
1781
9.94k
        img->get_pred_mode(xA[k],yA[k]) != MODE_INTRA) {
1782
1783
9.94k
      int Y=1-X;
1784
1785
9.94k
      const PBMotion& vi = img->get_mv_info(xA[k],yA[k]);
1786
9.94k
      if (vi.predFlag[X]==1 &&
1787
6.54k
          shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[X][ vi.refIdx[X] ]) {
1788
1789
4.33k
        logtrace(LogMotion,"take A%D/L%d as A candidate with different POCs\n",k,X);
1790
1791
4.33k
        out_availableFlagLXN[A]=1;
1792
4.33k
        out_mvLXN[A] = vi.mv[X];
1793
4.33k
        refIdxA = vi.refIdx[X];
1794
4.33k
        refPicList = X;
1795
4.33k
      }
1796
5.60k
      else if (vi.predFlag[Y]==1 &&
1797
4.10k
               shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[Y][ vi.refIdx[Y] ]) {
1798
1799
3.06k
        logtrace(LogMotion,"take A%d/L%d as A candidate with different POCs\n",k,Y);
1800
1801
3.06k
        out_availableFlagLXN[A]=1;
1802
3.06k
        out_mvLXN[A] = vi.mv[Y];
1803
3.06k
        refIdxA = vi.refIdx[Y];
1804
3.06k
        refPicList = Y;
1805
3.06k
      }
1806
9.94k
    }
1807
1808
51.6k
    if (out_availableFlagLXN[A]==1) {
1809
7.40k
      if (refIdxA<0) {
1810
0
        out_availableFlagLXN[0] = out_availableFlagLXN[1] = false;
1811
0
        return; // error
1812
0
      }
1813
1814
7.40k
      assert(refIdxA>=0);
1815
7.40k
      assert(refPicList>=0);
1816
1817
7.40k
      const de265_image* refPicA = ctx->get_image(shdr->RefPicList[refPicList][refIdxA ]);
1818
1819
#ifdef DE265_LOG_TRACE
1820
      const de265_image* refPicX = ctx->get_image(shdr->RefPicList[X][refIdxLX]);
1821
#endif
1822
1823
      //int picStateA = shdr->RefPicList_PicState[refPicList][refIdxA ];
1824
      //int picStateX = shdr->RefPicList_PicState[X         ][refIdxLX];
1825
1826
7.40k
      int isLongTermA = shdr->LongTermRefPic[refPicList][refIdxA ];
1827
7.40k
      int isLongTermX = shdr->LongTermRefPic[X         ][refIdxLX];
1828
1829
7.40k
      logtrace(LogMotion,"scale MVP A: A-POC:%d X-POC:%d\n",
1830
7.40k
               refPicA->PicOrderCntVal,refPicX->PicOrderCntVal);
1831
1832
7.40k
      if (!isLongTermA && !isLongTermX)
1833
      /*
1834
      if (picStateA == UsedForShortTermReference &&
1835
          picStateX == UsedForShortTermReference)
1836
      */
1837
4.53k
        {
1838
4.53k
          int distA = img->PicOrderCntVal - refPicA->PicOrderCntVal;
1839
4.53k
          int distX = img->PicOrderCntVal - referenced_POC;
1840
1841
4.53k
          if (!scale_mv(&out_mvLXN[A], out_mvLXN[A], distA, distX)) {
1842
0
            ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1843
0
            img->integrity = INTEGRITY_DECODING_ERRORS;
1844
0
          }
1845
4.53k
        }
1846
7.40k
    }
1847
51.6k
  }
1848
1849
  // --- B ---
1850
1851
  // 1.
1852
1853
182k
  int xB[3], yB[3];
1854
182k
  xB[0] = xP+nPbW;
1855
182k
  yB[0] = yP-1;
1856
182k
  xB[1] = xB[0]-1;
1857
182k
  yB[1] = yP-1;
1858
182k
  xB[2] = xP-1;
1859
182k
  yB[2] = yP-1;
1860
1861
  // 2.
1862
1863
182k
  out_availableFlagLXN[B] = 0;
1864
182k
  out_mvLXN[B].x = 0;
1865
182k
  out_mvLXN[B].y = 0;
1866
1867
  // 3. test B0,B1,B2 (Bk)
1868
1869
182k
  int refIdxB=-1;
1870
1871
182k
  bool availableB[3];
1872
729k
  for (int k=0;k<3;k++) {
1873
547k
    availableB[k] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB[k],yB[k]);
1874
1875
547k
    if (availableB[k] && out_availableFlagLXN[B]==0) {
1876
1877
178k
      int Y=1-X;
1878
1879
178k
      const PBMotion& vi = img->get_mv_info(xB[k],yB[k]);
1880
178k
      logtrace(LogMotion,"MVP B%d=\n",k);
1881
178k
      logmvcand(vi);
1882
1883
178k
      const de265_image* imgX = nullptr;
1884
178k
      if (vi.predFlag[X]) {
1885
157k
        imgX = ctx->get_image(shdr->RefPicList[X][ vi.refIdx[X] ]);
1886
157k
      }
1887
1888
178k
      const de265_image* imgY = nullptr;
1889
178k
      if (vi.predFlag[Y]) {
1890
72.7k
        imgY = ctx->get_image(shdr->RefPicList[Y][ vi.refIdx[Y] ]);
1891
72.7k
      }
1892
1893
178k
      if (vi.predFlag[X] && imgX && imgX->PicOrderCntVal == referenced_POC) {
1894
141k
        logtrace(LogMotion,"a) take B%d/L%d as B candidate with same POC\n",k,X);
1895
1896
141k
        out_availableFlagLXN[B]=1;
1897
141k
        out_mvLXN[B] = vi.mv[X];
1898
141k
        refIdxB = vi.refIdx[X];
1899
141k
      }
1900
36.9k
      else if (vi.predFlag[Y] && imgY && imgY->PicOrderCntVal == referenced_POC) {
1901
14.5k
        logtrace(LogMotion,"b) take B%d/L%d as B candidate with same POC\n",k,Y);
1902
1903
14.5k
        out_availableFlagLXN[B]=1;
1904
14.5k
        out_mvLXN[B] = vi.mv[Y];
1905
14.5k
        refIdxB = vi.refIdx[Y];
1906
14.5k
      }
1907
178k
    }
1908
547k
  }
1909
1910
  // 4.
1911
1912
182k
  if (isScaledFlagLX==0 &&      // no A predictor,
1913
16.9k
      out_availableFlagLXN[B])  // but an unscaled B predictor
1914
14.2k
    {
1915
      // use unscaled B predictor as A predictor
1916
1917
14.2k
      logtrace(LogMotion,"copy the same-POC B candidate as additional A candidate\n");
1918
1919
14.2k
      out_availableFlagLXN[A]=1;
1920
14.2k
      out_mvLXN[A] = out_mvLXN[B];
1921
14.2k
      refIdxA = refIdxB;
1922
14.2k
    }
1923
1924
  // 5.
1925
1926
  // If no A predictor, we output the unscaled B as the A predictor (above)
1927
  // and also add a scaled B predictor here.
1928
  // If there is (probably) an A predictor, no differing-POC B predictor is generated.
1929
182k
  if (isScaledFlagLX==0) {
1930
16.9k
    out_availableFlagLXN[B]=0;
1931
1932
41.6k
    for (int k=0 ; k<=2 && out_availableFlagLXN[B]==0 ; k++) {
1933
24.7k
      int refPicList=-1;
1934
1935
24.7k
      if (availableB[k]) {
1936
15.5k
        int Y=1-X;
1937
1938
15.5k
        const PBMotion& vi = img->get_mv_info(xB[k],yB[k]);
1939
1940
15.5k
        if (vi.predFlag[X]==1 &&
1941
13.7k
            shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[X][ vi.refIdx[X] ]) {
1942
13.2k
          out_availableFlagLXN[B]=1;
1943
13.2k
          out_mvLXN[B] = vi.mv[X];
1944
13.2k
          refIdxB = vi.refIdx[X];
1945
13.2k
          refPicList = X;
1946
13.2k
        }
1947
2.28k
        else if (vi.predFlag[Y]==1 &&
1948
2.03k
                 shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[Y][ vi.refIdx[Y] ]) {
1949
1.79k
          out_availableFlagLXN[B]=1;
1950
1.79k
          out_mvLXN[B] = vi.mv[Y];
1951
1.79k
          refIdxB = vi.refIdx[Y];
1952
1.79k
          refPicList = Y;
1953
1.79k
        }
1954
15.5k
      }
1955
1956
24.7k
      if (out_availableFlagLXN[B]==1) {
1957
15.0k
        if (refIdxB<0) {
1958
0
          out_availableFlagLXN[0] = out_availableFlagLXN[1] = false;
1959
0
          return; // error
1960
0
        }
1961
1962
15.0k
        assert(refPicList>=0);
1963
15.0k
        assert(refIdxB>=0);
1964
1965
15.0k
        const de265_image* refPicB=ctx->get_image(shdr->RefPicList[refPicList][refIdxB ]);
1966
15.0k
        const de265_image* refPicX=ctx->get_image(shdr->RefPicList[X         ][refIdxLX]);
1967
1968
15.0k
        int isLongTermB = shdr->LongTermRefPic[refPicList][refIdxB ];
1969
15.0k
        int isLongTermX = shdr->LongTermRefPic[X         ][refIdxLX];
1970
1971
15.0k
        if (refPicB==nullptr || refPicX==nullptr) {
1972
0
          img->decctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED,false);
1973
0
          img->integrity = INTEGRITY_DECODING_ERRORS;
1974
0
        }
1975
15.0k
        else if (refPicB->PicOrderCntVal != refPicX->PicOrderCntVal &&
1976
1.16k
                 !isLongTermB && !isLongTermX) {
1977
831
          int distB = img->PicOrderCntVal - refPicB->PicOrderCntVal;
1978
831
          int distX = img->PicOrderCntVal - referenced_POC;
1979
1980
831
          logtrace(LogMotion,"scale MVP B: B-POC:%d X-POC:%d\n",refPicB->PicOrderCntVal,refPicX->PicOrderCntVal);
1981
1982
831
          if (!scale_mv(&out_mvLXN[B], out_mvLXN[B], distB, distX)) {
1983
0
            ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1984
0
            img->integrity = INTEGRITY_DECODING_ERRORS;
1985
0
          }
1986
831
        }
1987
15.0k
      }
1988
24.7k
    }
1989
16.9k
  }
1990
182k
}
1991
1992
1993
// 8.5.3.1.5
1994
void fill_luma_motion_vector_predictors(base_context* ctx,
1995
                                        const slice_segment_header* shdr,
1996
                                        de265_image* img,
1997
                                        int xC,int yC,int nCS,int xP,int yP,
1998
                                        int nPbW,int nPbH, int l,
1999
                                        int refIdx, int partIdx,
2000
                                        MotionVector out_mvpList[2])
2001
182k
{
2002
  // 8.5.3.1.6: derive two spatial vector predictors A (0) and B (1)
2003
2004
182k
  uint8_t availableFlagLXN[2];
2005
182k
  MotionVector mvLXN[2];
2006
2007
182k
  derive_spatial_luma_vector_prediction(ctx, img, shdr, xC,yC, nCS, xP,yP,
2008
182k
                                        nPbW,nPbH, l, refIdx, partIdx,
2009
182k
                                        availableFlagLXN, mvLXN);
2010
2011
  // 8.5.3.1.7: if we only have one spatial vector or both spatial vectors are the same,
2012
  // derive a temporal predictor
2013
2014
182k
  uint8_t availableFlagLXCol;
2015
182k
  MotionVector mvLXCol;
2016
2017
2018
182k
  if (availableFlagLXN[0] &&
2019
177k
      availableFlagLXN[1] &&
2020
155k
      (mvLXN[0].x != mvLXN[1].x || mvLXN[0].y != mvLXN[1].y)) {
2021
93.6k
    availableFlagLXCol = 0;
2022
93.6k
  }
2023
88.6k
  else {
2024
88.6k
    derive_temporal_luma_vector_prediction(ctx, img, shdr,
2025
88.6k
                                           xP,yP, nPbW,nPbH, refIdx,l,
2026
88.6k
                                           &mvLXCol, &availableFlagLXCol);
2027
88.6k
  }
2028
2029
2030
  // --- build candidate vector list with exactly two entries ---
2031
2032
182k
  int numMVPCandLX=0;
2033
2034
  // spatial predictor A
2035
2036
182k
  if (availableFlagLXN[0])
2037
177k
    {
2038
177k
      out_mvpList[numMVPCandLX++] = mvLXN[0];
2039
177k
    }
2040
2041
  // spatial predictor B (if not same as A)
2042
2043
182k
  if (availableFlagLXN[1] &&
2044
156k
      (!availableFlagLXN[0] || // in case A in not available, but mvLXA initialized to same as mvLXB
2045
155k
       (mvLXN[0].x != mvLXN[1].x || mvLXN[0].y != mvLXN[1].y)))
2046
95.4k
    {
2047
95.4k
      out_mvpList[numMVPCandLX++] = mvLXN[1];
2048
95.4k
    }
2049
2050
  // temporal predictor
2051
2052
182k
  if (availableFlagLXCol)
2053
1.72k
    {
2054
1.72k
      out_mvpList[numMVPCandLX++] = mvLXCol;
2055
1.72k
    }
2056
2057
  // fill with zero predictors
2058
2059
272k
  while (numMVPCandLX<2) {
2060
89.9k
    out_mvpList[numMVPCandLX].x = 0;
2061
89.9k
    out_mvpList[numMVPCandLX].y = 0;
2062
89.9k
    numMVPCandLX++;
2063
89.9k
  }
2064
2065
2066
182k
  assert(numMVPCandLX==2);
2067
182k
}
2068
2069
2070
MotionVector luma_motion_vector_prediction(base_context* ctx,
2071
                                           const slice_segment_header* shdr,
2072
                                           de265_image* img,
2073
                                           const PBMotionCoding& motion,
2074
                                           int xC,int yC,int nCS,int xP,int yP,
2075
                                           int nPbW,int nPbH, int l,
2076
                                           int refIdx, int partIdx)
2077
182k
{
2078
182k
  MotionVector mvpList[2];
2079
2080
182k
  fill_luma_motion_vector_predictors(ctx, shdr, img,
2081
182k
                                     xC,yC,nCS,xP,yP,
2082
182k
                                     nPbW, nPbH, l, refIdx, partIdx,
2083
182k
                                     mvpList);
2084
2085
  // select predictor according to mvp_lX_flag
2086
2087
182k
  return mvpList[ l ? motion.mvp_l1_flag : motion.mvp_l0_flag ];
2088
182k
}
2089
2090
2091
#if DE265_LOG_TRACE
2092
void logMV(int x0,int y0,int nPbW,int nPbH, const char* mode,const PBMotion* mv)
2093
{
2094
  int pred0 = mv->predFlag[0];
2095
  int pred1 = mv->predFlag[1];
2096
2097
  logtrace(LogMotion,
2098
           "*MV %d;%d [%d;%d] %s: (%d) %d;%d @%d   (%d) %d;%d @%d\n", x0,y0,nPbW,nPbH,mode,
2099
           pred0,
2100
           pred0 ? mv->mv[0].x : 0,pred0 ? mv->mv[0].y : 0, pred0 ? mv->refIdx[0] : 0,
2101
           pred1,
2102
           pred1 ? mv->mv[1].x : 0,pred1 ? mv->mv[1].y : 0, pred1 ? mv->refIdx[1] : 0);
2103
}
2104
#else
2105
#define logMV(x0,y0,nPbW,nPbH,mode,mv)
2106
#endif
2107
2108
2109
2110
// 8.5.3.1
2111
void motion_vectors_and_ref_indices(base_context* ctx,
2112
                                    const slice_segment_header* shdr,
2113
                                    de265_image* img,
2114
                                    const PBMotionCoding& motion,
2115
                                    int xC,int yC, int xB,int yB, int nCS, int nPbW,int nPbH,
2116
                                    int partIdx,
2117
                                    PBMotion* out_vi)
2118
427k
{
2119
  //slice_segment_header* shdr = tctx->shdr;
2120
2121
427k
  int xP = xC+xB;
2122
427k
  int yP = yC+yB;
2123
2124
427k
  enum PredMode predMode = img->get_pred_mode(xC,yC);
2125
2126
427k
  if (predMode == MODE_SKIP ||
2127
281k
      (predMode == MODE_INTER && motion.merge_flag))
2128
270k
    {
2129
270k
      derive_luma_motion_merge_mode(ctx,shdr,img,
2130
270k
                                    xC,yC, xP,yP, nCS,nPbW,nPbH, partIdx,
2131
270k
                                    motion.merge_idx, out_vi);
2132
2133
270k
      logMV(xP,yP,nPbW,nPbH, "merge_mode", out_vi);
2134
270k
    }
2135
156k
  else {
2136
156k
    int mvdL[2][2];
2137
156k
    MotionVector mvpL[2];
2138
2139
469k
    for (int l=0;l<2;l++) {
2140
      // 1.
2141
2142
312k
      enum InterPredIdc inter_pred_idc = (enum InterPredIdc)motion.inter_pred_idc;
2143
2144
312k
      if (inter_pred_idc == PRED_BI ||
2145
260k
          (inter_pred_idc == PRED_L0 && l==0) ||
2146
182k
          (inter_pred_idc == PRED_L1 && l==1)) {
2147
182k
        out_vi->refIdx[l] = motion.refIdx[l];
2148
182k
        out_vi->predFlag[l] = 1;
2149
182k
      }
2150
130k
      else {
2151
130k
        out_vi->refIdx[l] = 0;
2152
130k
        out_vi->predFlag[l] = 0;
2153
130k
      }
2154
2155
      // 2.
2156
2157
312k
      mvdL[l][0] = motion.mvd[l][0];
2158
312k
      mvdL[l][1] = motion.mvd[l][1];
2159
2160
2161
312k
      if (out_vi->predFlag[l]) {
2162
        // 3.
2163
2164
182k
        mvpL[l] = luma_motion_vector_prediction(ctx,shdr,img,motion,
2165
182k
                                                xC,yC,nCS,xP,yP, nPbW,nPbH, l,
2166
182k
                                                out_vi->refIdx[l], partIdx);
2167
2168
        // 4.
2169
2170
182k
        int32_t x = (mvpL[l].x + mvdL[l][0] + 0x10000) & 0xFFFF;
2171
182k
        int32_t y = (mvpL[l].y + mvdL[l][1] + 0x10000) & 0xFFFF;
2172
2173
182k
        out_vi->mv[l].x = (x>=0x8000) ? x-0x10000 : x;
2174
182k
        out_vi->mv[l].y = (y>=0x8000) ? y-0x10000 : y;
2175
182k
      }
2176
312k
    }
2177
2178
156k
    logMV(xP,yP,nPbW,nPbH, "mvp", out_vi);
2179
156k
  }
2180
427k
}
2181
2182
2183
// 8.5.3
2184
2185
/* xC/yC : CB position
2186
   xB/yB : position offset of the PB
2187
   nPbW/nPbH : size of PB
2188
   nCS   : CB size
2189
 */
2190
void decode_prediction_unit(base_context* ctx,
2191
                            const slice_segment_header* shdr,
2192
                            de265_image* img,
2193
                            const PBMotionCoding& motion,
2194
                            int xC,int yC, int xB,int yB, int nCS, int nPbW,int nPbH, int partIdx)
2195
427k
{
2196
427k
  logtrace(LogMotion,"decode_prediction_unit POC=%d %d;%d %dx%d\n",
2197
427k
           img->PicOrderCntVal, xC+xB,yC+yB, nPbW,nPbH);
2198
2199
  //slice_segment_header* shdr = tctx->shdr;
2200
2201
  // 1.
2202
2203
427k
  PBMotion vi;
2204
427k
  motion_vectors_and_ref_indices(ctx, shdr, img, motion,
2205
427k
                                 xC,yC, xB,yB, nCS, nPbW,nPbH, partIdx, &vi);
2206
2207
  // 2.
2208
2209
427k
  generate_inter_prediction_samples(ctx,shdr, img, xC,yC, xB,yB, nCS, nPbW,nPbH, &vi);
2210
2211
2212
427k
  img->set_mv_info(xC+xB,yC+yB,nPbW,nPbH, vi);
2213
427k
}