Coverage Report

Created: 2025-08-11 08:01

/src/libde265/libde265/motion.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * H.265 video codec.
3
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4
 *
5
 * This file is part of libde265.
6
 *
7
 * libde265 is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as
9
 * published by the Free Software Foundation, either version 3 of
10
 * the License, or (at your option) any later version.
11
 *
12
 * libde265 is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "motion.h"
22
#include "decctx.h"
23
#include "util.h"
24
#include "dpb.h"
25
26
#include <assert.h>
27
28
29
#include <sys/types.h>
30
#include <signal.h>
31
#include <string.h>
32
33
#if defined(_MSC_VER) || defined(__MINGW32__)
34
# include <malloc.h>
35
#elif defined(HAVE_ALLOCA_H)
36
# include <alloca.h>
37
#endif
38
39
40
237M
#define MAX_CU_SIZE 64
41
42
43
static int extra_before[4] = { 0,3,3,2 };
44
static int extra_after [4] = { 0,3,4,4 };
45
46
47
48
template <class pixel_t>
49
void mc_luma(const base_context* ctx,
50
             const seq_parameter_set* sps, int mv_x, int mv_y,
51
             int xP,int yP,
52
             int16_t* out, int out_stride,
53
             const pixel_t* ref, int ref_stride,
54
             int nPbW, int nPbH, int bitDepth_L)
55
3.59M
{
56
3.59M
  int xFracL = mv_x & 3;
57
3.59M
  int yFracL = mv_y & 3;
58
59
3.59M
  int xIntOffsL = xP + (mv_x>>2);
60
3.59M
  int yIntOffsL = yP + (mv_y>>2);
61
62
  // luma sample interpolation process (8.5.3.2.2.1)
63
64
  //const int shift1 = sps->BitDepth_Y-8;
65
  //const int shift2 = 6;
66
3.59M
  const int shift3 = 14 - sps->BitDepth_Y;
67
68
3.59M
  int w = sps->pic_width_in_luma_samples;
69
3.59M
  int h = sps->pic_height_in_luma_samples;
70
71
3.59M
  ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)];
72
73
3.59M
  if (xFracL==0 && yFracL==0) {
74
75
2.09M
    if (xIntOffsL >= 0 && yIntOffsL >= 0 &&
76
2.09M
        nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) {
77
78
2.02M
      ctx->acceleration.put_hevc_qpel(out, out_stride,
79
2.02M
                                      &ref[yIntOffsL*ref_stride + xIntOffsL],
80
2.02M
                                      ref_stride /* sizeof(pixel_t)*/,
81
2.02M
                                      nPbW,nPbH, mcbuffer, 0,0, bitDepth_L);
82
2.02M
    }
83
74.0k
    else {
84
691k
      for (int y=0;y<nPbH;y++)
85
7.06M
        for (int x=0;x<nPbW;x++) {
86
87
6.45M
          int xA = Clip3(0,w-1,x + xIntOffsL);
88
6.45M
          int yA = Clip3(0,h-1,y + yIntOffsL);
89
90
6.45M
          out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3;
91
6.45M
        }
92
74.0k
    }
93
94
#ifdef DE265_LOG_TRACE
95
    logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL);
96
97
    for (int y=0;y<nPbH;y++) {
98
      for (int x=0;x<nPbW;x++) {
99
100
        int xA = Clip3(0,w-1,x + xIntOffsL);
101
        int yA = Clip3(0,h-1,y + yIntOffsL);
102
103
        logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]);
104
      }
105
      logtrace(LogMotion,"\n");
106
    }
107
108
    logtrace(LogMotion," -> \n");
109
110
    for (int y=0;y<nPbH;y++) {
111
      for (int x=0;x<nPbW;x++) {
112
113
        logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions
114
      }
115
      logtrace(LogMotion,"\n");
116
    }
117
#endif
118
2.09M
  }
119
1.49M
  else {
120
1.49M
    int extra_left   = extra_before[xFracL];
121
1.49M
    int extra_right  = extra_after [xFracL];
122
1.49M
    int extra_top    = extra_before[yFracL];
123
1.49M
    int extra_bottom = extra_after [yFracL];
124
125
    //int nPbW_extra = extra_left + nPbW + extra_right;
126
    //int nPbH_extra = extra_top  + nPbH + extra_bottom;
127
128
129
1.49M
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)];
130
131
1.49M
    const pixel_t* src_ptr;
132
1.49M
    int src_stride;
133
134
1.49M
    if (-extra_left + xIntOffsL >= 0 &&
135
1.49M
        -extra_top  + yIntOffsL >= 0 &&
136
1.49M
        nPbW+extra_right  + xIntOffsL < w &&
137
1.49M
        nPbH+extra_bottom + yIntOffsL < h) {
138
910k
      src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride];
139
910k
      src_stride = ref_stride;
140
910k
    }
141
582k
    else {
142
9.06M
      for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
143
147M
        for (int x=-extra_left;x<nPbW+extra_right;x++) {
144
145
139M
          int xA = Clip3(0,w-1,x + xIntOffsL);
146
139M
          int yA = Clip3(0,h-1,y + yIntOffsL);
147
148
139M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
149
139M
        }
150
8.47M
      }
151
152
582k
      src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left];
153
582k
      src_stride = MAX_CU_SIZE+16;
154
582k
    }
155
156
1.49M
    ctx->acceleration.put_hevc_qpel(out, out_stride,
157
1.49M
                                    src_ptr, src_stride /* sizeof(pixel_t) */,
158
1.49M
                                    nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L);
159
160
161
1.49M
    logtrace(LogMotion,"---V---\n");
162
15.4M
    for (int y=0;y<nPbH;y++) {
163
187M
      for (int x=0;x<nPbW;x++) {
164
174M
        logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
165
174M
      }
166
13.9M
      logtrace(LogMotion,"\n");
167
13.9M
    }
168
1.49M
  }
169
3.59M
}
void mc_luma<unsigned short>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned short const*, int, int, int, int)
Line
Count
Source
55
1.54M
{
56
1.54M
  int xFracL = mv_x & 3;
57
1.54M
  int yFracL = mv_y & 3;
58
59
1.54M
  int xIntOffsL = xP + (mv_x>>2);
60
1.54M
  int yIntOffsL = yP + (mv_y>>2);
61
62
  // luma sample interpolation process (8.5.3.2.2.1)
63
64
  //const int shift1 = sps->BitDepth_Y-8;
65
  //const int shift2 = 6;
66
1.54M
  const int shift3 = 14 - sps->BitDepth_Y;
67
68
1.54M
  int w = sps->pic_width_in_luma_samples;
69
1.54M
  int h = sps->pic_height_in_luma_samples;
70
71
1.54M
  ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)];
72
73
1.54M
  if (xFracL==0 && yFracL==0) {
74
75
915k
    if (xIntOffsL >= 0 && yIntOffsL >= 0 &&
76
915k
        nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) {
77
78
868k
      ctx->acceleration.put_hevc_qpel(out, out_stride,
79
868k
                                      &ref[yIntOffsL*ref_stride + xIntOffsL],
80
868k
                                      ref_stride /* sizeof(pixel_t)*/,
81
868k
                                      nPbW,nPbH, mcbuffer, 0,0, bitDepth_L);
82
868k
    }
83
46.7k
    else {
84
433k
      for (int y=0;y<nPbH;y++)
85
4.45M
        for (int x=0;x<nPbW;x++) {
86
87
4.07M
          int xA = Clip3(0,w-1,x + xIntOffsL);
88
4.07M
          int yA = Clip3(0,h-1,y + yIntOffsL);
89
90
4.07M
          out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3;
91
4.07M
        }
92
46.7k
    }
93
94
#ifdef DE265_LOG_TRACE
95
    logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL);
96
97
    for (int y=0;y<nPbH;y++) {
98
      for (int x=0;x<nPbW;x++) {
99
100
        int xA = Clip3(0,w-1,x + xIntOffsL);
101
        int yA = Clip3(0,h-1,y + yIntOffsL);
102
103
        logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]);
104
      }
105
      logtrace(LogMotion,"\n");
106
    }
107
108
    logtrace(LogMotion," -> \n");
109
110
    for (int y=0;y<nPbH;y++) {
111
      for (int x=0;x<nPbW;x++) {
112
113
        logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions
114
      }
115
      logtrace(LogMotion,"\n");
116
    }
117
#endif
118
915k
  }
119
625k
  else {
120
625k
    int extra_left   = extra_before[xFracL];
121
625k
    int extra_right  = extra_after [xFracL];
122
625k
    int extra_top    = extra_before[yFracL];
123
625k
    int extra_bottom = extra_after [yFracL];
124
125
    //int nPbW_extra = extra_left + nPbW + extra_right;
126
    //int nPbH_extra = extra_top  + nPbH + extra_bottom;
127
128
129
625k
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)];
130
131
625k
    const pixel_t* src_ptr;
132
625k
    int src_stride;
133
134
625k
    if (-extra_left + xIntOffsL >= 0 &&
135
625k
        -extra_top  + yIntOffsL >= 0 &&
136
625k
        nPbW+extra_right  + xIntOffsL < w &&
137
625k
        nPbH+extra_bottom + yIntOffsL < h) {
138
281k
      src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride];
139
281k
      src_stride = ref_stride;
140
281k
    }
141
343k
    else {
142
5.14M
      for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
143
81.1M
        for (int x=-extra_left;x<nPbW+extra_right;x++) {
144
145
76.3M
          int xA = Clip3(0,w-1,x + xIntOffsL);
146
76.3M
          int yA = Clip3(0,h-1,y + yIntOffsL);
147
148
76.3M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
149
76.3M
        }
150
4.79M
      }
151
152
343k
      src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left];
153
343k
      src_stride = MAX_CU_SIZE+16;
154
343k
    }
155
156
625k
    ctx->acceleration.put_hevc_qpel(out, out_stride,
157
625k
                                    src_ptr, src_stride /* sizeof(pixel_t) */,
158
625k
                                    nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L);
159
160
161
625k
    logtrace(LogMotion,"---V---\n");
162
6.40M
    for (int y=0;y<nPbH;y++) {
163
80.0M
      for (int x=0;x<nPbW;x++) {
164
74.3M
        logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
165
74.3M
      }
166
5.77M
      logtrace(LogMotion,"\n");
167
5.77M
    }
168
625k
  }
169
1.54M
}
void mc_luma<unsigned char>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned char const*, int, int, int, int)
Line
Count
Source
55
2.05M
{
56
2.05M
  int xFracL = mv_x & 3;
57
2.05M
  int yFracL = mv_y & 3;
58
59
2.05M
  int xIntOffsL = xP + (mv_x>>2);
60
2.05M
  int yIntOffsL = yP + (mv_y>>2);
61
62
  // luma sample interpolation process (8.5.3.2.2.1)
63
64
  //const int shift1 = sps->BitDepth_Y-8;
65
  //const int shift2 = 6;
66
2.05M
  const int shift3 = 14 - sps->BitDepth_Y;
67
68
2.05M
  int w = sps->pic_width_in_luma_samples;
69
2.05M
  int h = sps->pic_height_in_luma_samples;
70
71
2.05M
  ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)];
72
73
2.05M
  if (xFracL==0 && yFracL==0) {
74
75
1.18M
    if (xIntOffsL >= 0 && yIntOffsL >= 0 &&
76
1.18M
        nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) {
77
78
1.15M
      ctx->acceleration.put_hevc_qpel(out, out_stride,
79
1.15M
                                      &ref[yIntOffsL*ref_stride + xIntOffsL],
80
1.15M
                                      ref_stride /* sizeof(pixel_t)*/,
81
1.15M
                                      nPbW,nPbH, mcbuffer, 0,0, bitDepth_L);
82
1.15M
    }
83
27.2k
    else {
84
257k
      for (int y=0;y<nPbH;y++)
85
2.61M
        for (int x=0;x<nPbW;x++) {
86
87
2.38M
          int xA = Clip3(0,w-1,x + xIntOffsL);
88
2.38M
          int yA = Clip3(0,h-1,y + yIntOffsL);
89
90
2.38M
          out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3;
91
2.38M
        }
92
27.2k
    }
93
94
#ifdef DE265_LOG_TRACE
95
    logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL);
96
97
    for (int y=0;y<nPbH;y++) {
98
      for (int x=0;x<nPbW;x++) {
99
100
        int xA = Clip3(0,w-1,x + xIntOffsL);
101
        int yA = Clip3(0,h-1,y + yIntOffsL);
102
103
        logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]);
104
      }
105
      logtrace(LogMotion,"\n");
106
    }
107
108
    logtrace(LogMotion," -> \n");
109
110
    for (int y=0;y<nPbH;y++) {
111
      for (int x=0;x<nPbW;x++) {
112
113
        logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions
114
      }
115
      logtrace(LogMotion,"\n");
116
    }
117
#endif
118
1.18M
  }
119
867k
  else {
120
867k
    int extra_left   = extra_before[xFracL];
121
867k
    int extra_right  = extra_after [xFracL];
122
867k
    int extra_top    = extra_before[yFracL];
123
867k
    int extra_bottom = extra_after [yFracL];
124
125
    //int nPbW_extra = extra_left + nPbW + extra_right;
126
    //int nPbH_extra = extra_top  + nPbH + extra_bottom;
127
128
129
867k
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)];
130
131
867k
    const pixel_t* src_ptr;
132
867k
    int src_stride;
133
134
867k
    if (-extra_left + xIntOffsL >= 0 &&
135
867k
        -extra_top  + yIntOffsL >= 0 &&
136
867k
        nPbW+extra_right  + xIntOffsL < w &&
137
867k
        nPbH+extra_bottom + yIntOffsL < h) {
138
628k
      src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride];
139
628k
      src_stride = ref_stride;
140
628k
    }
141
238k
    else {
142
3.92M
      for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
143
66.4M
        for (int x=-extra_left;x<nPbW+extra_right;x++) {
144
145
62.7M
          int xA = Clip3(0,w-1,x + xIntOffsL);
146
62.7M
          int yA = Clip3(0,h-1,y + yIntOffsL);
147
148
62.7M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
149
62.7M
        }
150
3.68M
      }
151
152
238k
      src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left];
153
238k
      src_stride = MAX_CU_SIZE+16;
154
238k
    }
155
156
867k
    ctx->acceleration.put_hevc_qpel(out, out_stride,
157
867k
                                    src_ptr, src_stride /* sizeof(pixel_t) */,
158
867k
                                    nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L);
159
160
161
867k
    logtrace(LogMotion,"---V---\n");
162
9.00M
    for (int y=0;y<nPbH;y++) {
163
107M
      for (int x=0;x<nPbW;x++) {
164
99.7M
        logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
165
99.7M
      }
166
8.13M
      logtrace(LogMotion,"\n");
167
8.13M
    }
168
867k
  }
169
2.05M
}
170
171
172
173
template <class pixel_t>
174
void mc_chroma(const base_context* ctx,
175
               const seq_parameter_set* sps,
176
               int mv_x, int mv_y,
177
               int xP,int yP,
178
               int16_t* out, int out_stride,
179
               const pixel_t* ref, int ref_stride,
180
               int nPbWC, int nPbHC, int bit_depth_C)
181
6.62M
{
182
  // chroma sample interpolation process (8.5.3.2.2.2)
183
184
  //const int shift1 = sps->BitDepth_C-8;
185
  //const int shift2 = 6;
186
6.62M
  const int shift3 = 14 - sps->BitDepth_C;
187
188
6.62M
  int wC = sps->pic_width_in_luma_samples /sps->SubWidthC;
189
6.62M
  int hC = sps->pic_height_in_luma_samples/sps->SubHeightC;
190
191
6.62M
  mv_x *= 2 / sps->SubWidthC;
192
6.62M
  mv_y *= 2 / sps->SubHeightC;
193
194
6.62M
  int xFracC = mv_x & 7;
195
6.62M
  int yFracC = mv_y & 7;
196
197
6.62M
  int xIntOffsC = xP/sps->SubWidthC  + (mv_x>>3);
198
6.62M
  int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3);
199
200
6.62M
  ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]);
201
202
6.62M
  if (xFracC == 0 && yFracC == 0) {
203
3.75M
    if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC &&
204
3.75M
        yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) {
205
3.67M
      ctx->acceleration.put_hevc_epel(out, out_stride,
206
3.67M
                                      &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride,
207
3.67M
                                      nPbWC,nPbHC, 0,0, NULL, bit_depth_C);
208
3.67M
    }
209
73.4k
    else
210
73.4k
      {
211
474k
        for (int y=0;y<nPbHC;y++)
212
3.24M
          for (int x=0;x<nPbWC;x++) {
213
214
2.84M
            int xB = Clip3(0,wC-1,x + xIntOffsC);
215
2.84M
            int yB = Clip3(0,hC-1,y + yIntOffsC);
216
217
2.84M
            out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3;
218
2.84M
          }
219
73.4k
      }
220
3.75M
  }
221
2.87M
  else {
222
2.87M
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)];
223
224
2.87M
    const pixel_t* src_ptr;
225
2.87M
    int src_stride;
226
227
2.87M
    int extra_top  = 1;
228
2.87M
    int extra_left = 1;
229
2.87M
    int extra_right  = 2;
230
2.87M
    int extra_bottom = 2;
231
232
2.87M
    if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 &&
233
2.87M
        yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) {
234
1.81M
      src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride];
235
1.81M
      src_stride = ref_stride;
236
1.81M
    }
237
1.06M
    else {
238
10.2M
      for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
239
104M
        for (int x=-extra_left;x<nPbWC+extra_right;x++) {
240
241
95.4M
          int xA = Clip3(0,wC-1,x + xIntOffsC);
242
95.4M
          int yA = Clip3(0,hC-1,y + yIntOffsC);
243
244
95.4M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
245
95.4M
        }
246
9.22M
      }
247
248
1.06M
      src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)];
249
1.06M
      src_stride = MAX_CU_SIZE+16;
250
1.06M
    }
251
252
253
2.87M
    if (xFracC && yFracC) {
254
1.75M
      ctx->acceleration.put_hevc_epel_hv(out, out_stride,
255
1.75M
                                         src_ptr, src_stride,
256
1.75M
                                         nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
257
1.75M
    }
258
1.12M
    else if (xFracC) {
259
529k
      ctx->acceleration.put_hevc_epel_h(out, out_stride,
260
529k
                                        src_ptr, src_stride,
261
529k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
262
529k
    }
263
594k
    else if (yFracC) {
264
594k
      ctx->acceleration.put_hevc_epel_v(out, out_stride,
265
594k
                                        src_ptr, src_stride,
266
594k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
267
594k
    }
268
0
    else {
269
0
      assert(false); // full-pel shifts are handled above
270
0
    }
271
2.87M
  }
272
6.62M
}
void mc_chroma<unsigned short>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned short const*, int, int, int, int)
Line
Count
Source
181
2.53M
{
182
  // chroma sample interpolation process (8.5.3.2.2.2)
183
184
  //const int shift1 = sps->BitDepth_C-8;
185
  //const int shift2 = 6;
186
2.53M
  const int shift3 = 14 - sps->BitDepth_C;
187
188
2.53M
  int wC = sps->pic_width_in_luma_samples /sps->SubWidthC;
189
2.53M
  int hC = sps->pic_height_in_luma_samples/sps->SubHeightC;
190
191
2.53M
  mv_x *= 2 / sps->SubWidthC;
192
2.53M
  mv_y *= 2 / sps->SubHeightC;
193
194
2.53M
  int xFracC = mv_x & 7;
195
2.53M
  int yFracC = mv_y & 7;
196
197
2.53M
  int xIntOffsC = xP/sps->SubWidthC  + (mv_x>>3);
198
2.53M
  int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3);
199
200
2.53M
  ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]);
201
202
2.53M
  if (xFracC == 0 && yFracC == 0) {
203
1.36M
    if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC &&
204
1.36M
        yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) {
205
1.32M
      ctx->acceleration.put_hevc_epel(out, out_stride,
206
1.32M
                                      &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride,
207
1.32M
                                      nPbWC,nPbHC, 0,0, NULL, bit_depth_C);
208
1.32M
    }
209
42.5k
    else
210
42.5k
      {
211
255k
        for (int y=0;y<nPbHC;y++)
212
1.47M
          for (int x=0;x<nPbWC;x++) {
213
214
1.26M
            int xB = Clip3(0,wC-1,x + xIntOffsC);
215
1.26M
            int yB = Clip3(0,hC-1,y + yIntOffsC);
216
217
1.26M
            out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3;
218
1.26M
          }
219
42.5k
      }
220
1.36M
  }
221
1.17M
  else {
222
1.17M
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)];
223
224
1.17M
    const pixel_t* src_ptr;
225
1.17M
    int src_stride;
226
227
1.17M
    int extra_top  = 1;
228
1.17M
    int extra_left = 1;
229
1.17M
    int extra_right  = 2;
230
1.17M
    int extra_bottom = 2;
231
232
1.17M
    if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 &&
233
1.17M
        yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) {
234
611k
      src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride];
235
611k
      src_stride = ref_stride;
236
611k
    }
237
561k
    else {
238
4.95M
      for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
239
43.6M
        for (int x=-extra_left;x<nPbWC+extra_right;x++) {
240
241
39.2M
          int xA = Clip3(0,wC-1,x + xIntOffsC);
242
39.2M
          int yA = Clip3(0,hC-1,y + yIntOffsC);
243
244
39.2M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
245
39.2M
        }
246
4.39M
      }
247
248
561k
      src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)];
249
561k
      src_stride = MAX_CU_SIZE+16;
250
561k
    }
251
252
253
1.17M
    if (xFracC && yFracC) {
254
790k
      ctx->acceleration.put_hevc_epel_hv(out, out_stride,
255
790k
                                         src_ptr, src_stride,
256
790k
                                         nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
257
790k
    }
258
382k
    else if (xFracC) {
259
169k
      ctx->acceleration.put_hevc_epel_h(out, out_stride,
260
169k
                                        src_ptr, src_stride,
261
169k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
262
169k
    }
263
212k
    else if (yFracC) {
264
212k
      ctx->acceleration.put_hevc_epel_v(out, out_stride,
265
212k
                                        src_ptr, src_stride,
266
212k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
267
212k
    }
268
0
    else {
269
0
      assert(false); // full-pel shifts are handled above
270
0
    }
271
1.17M
  }
272
2.53M
}
void mc_chroma<unsigned char>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned char const*, int, int, int, int)
Line
Count
Source
181
4.09M
{
182
  // chroma sample interpolation process (8.5.3.2.2.2)
183
184
  //const int shift1 = sps->BitDepth_C-8;
185
  //const int shift2 = 6;
186
4.09M
  const int shift3 = 14 - sps->BitDepth_C;
187
188
4.09M
  int wC = sps->pic_width_in_luma_samples /sps->SubWidthC;
189
4.09M
  int hC = sps->pic_height_in_luma_samples/sps->SubHeightC;
190
191
4.09M
  mv_x *= 2 / sps->SubWidthC;
192
4.09M
  mv_y *= 2 / sps->SubHeightC;
193
194
4.09M
  int xFracC = mv_x & 7;
195
4.09M
  int yFracC = mv_y & 7;
196
197
4.09M
  int xIntOffsC = xP/sps->SubWidthC  + (mv_x>>3);
198
4.09M
  int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3);
199
200
4.09M
  ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]);
201
202
4.09M
  if (xFracC == 0 && yFracC == 0) {
203
2.38M
    if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC &&
204
2.38M
        yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) {
205
2.35M
      ctx->acceleration.put_hevc_epel(out, out_stride,
206
2.35M
                                      &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride,
207
2.35M
                                      nPbWC,nPbHC, 0,0, NULL, bit_depth_C);
208
2.35M
    }
209
30.8k
    else
210
30.8k
      {
211
218k
        for (int y=0;y<nPbHC;y++)
212
1.76M
          for (int x=0;x<nPbWC;x++) {
213
214
1.58M
            int xB = Clip3(0,wC-1,x + xIntOffsC);
215
1.58M
            int yB = Clip3(0,hC-1,y + yIntOffsC);
216
217
1.58M
            out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3;
218
1.58M
          }
219
30.8k
      }
220
2.38M
  }
221
1.70M
  else {
222
1.70M
    pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)];
223
224
1.70M
    const pixel_t* src_ptr;
225
1.70M
    int src_stride;
226
227
1.70M
    int extra_top  = 1;
228
1.70M
    int extra_left = 1;
229
1.70M
    int extra_right  = 2;
230
1.70M
    int extra_bottom = 2;
231
232
1.70M
    if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 &&
233
1.70M
        yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) {
234
1.19M
      src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride];
235
1.19M
      src_stride = ref_stride;
236
1.19M
    }
237
507k
    else {
238
5.34M
      for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
239
61.0M
        for (int x=-extra_left;x<nPbWC+extra_right;x++) {
240
241
56.2M
          int xA = Clip3(0,wC-1,x + xIntOffsC);
242
56.2M
          int yA = Clip3(0,hC-1,y + yIntOffsC);
243
244
56.2M
          padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
245
56.2M
        }
246
4.83M
      }
247
248
507k
      src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)];
249
507k
      src_stride = MAX_CU_SIZE+16;
250
507k
    }
251
252
253
1.70M
    if (xFracC && yFracC) {
254
964k
      ctx->acceleration.put_hevc_epel_hv(out, out_stride,
255
964k
                                         src_ptr, src_stride,
256
964k
                                         nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
257
964k
    }
258
741k
    else if (xFracC) {
259
359k
      ctx->acceleration.put_hevc_epel_h(out, out_stride,
260
359k
                                        src_ptr, src_stride,
261
359k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
262
359k
    }
263
381k
    else if (yFracC) {
264
381k
      ctx->acceleration.put_hevc_epel_v(out, out_stride,
265
381k
                                        src_ptr, src_stride,
266
381k
                                        nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
267
381k
    }
268
0
    else {
269
0
      assert(false); // full-pel shifts are handled above
270
0
    }
271
1.70M
  }
272
4.09M
}
273
274
275
276
// 8.5.3.2
277
// NOTE: for full-pel shifts, we can introduce a fast path, simply copying without shifts
278
void generate_inter_prediction_samples(base_context* ctx,
279
                                       const slice_segment_header* shdr,
280
                                       de265_image* img,
281
                                       int xC,int yC,
282
                                       int xB,int yB,
283
                                       int nCS, int nPbW,int nPbH,
284
                                       const PBMotion* vi)
285
2.76M
{
286
2.76M
  int xP = xC+xB;
287
2.76M
  int yP = yC+yB;
288
289
2.76M
  void*  pixels[3];
290
2.76M
  int    stride[3];
291
292
2.76M
  const pic_parameter_set* pps = shdr->pps.get();
293
2.76M
  const seq_parameter_set* sps = pps->sps.get();
294
295
2.76M
  if (sps->BitDepth_Y != img->get_bit_depth(0) ||
296
2.76M
      sps->BitDepth_C != img->get_bit_depth(1)) {
297
0
    img->integrity = INTEGRITY_DECODING_ERRORS;
298
0
    ctx->add_warning(DE265_WARNING_BIT_DEPTH_OF_CURRENT_IMAGE_DOES_NOT_MATCH_SPS, false);
299
0
    return;
300
0
  }
301
302
2.76M
  if (sps->chroma_format_idc != img->get_chroma_format()) {
303
0
    img->integrity = INTEGRITY_DECODING_ERRORS;
304
0
    ctx->add_warning(DE265_WARNING_CHROMA_OF_CURRENT_IMAGE_DOES_NOT_MATCH_SPS, false);
305
0
    return;
306
0
  }
307
308
2.76M
  const int SubWidthC  = sps->SubWidthC;
309
2.76M
  const int SubHeightC = sps->SubHeightC;
310
311
2.76M
  pixels[0] = img->get_image_plane_at_pos_any_depth(0,xP,yP);
312
2.76M
  stride[0] = img->get_image_stride(0);
313
314
2.76M
  pixels[1] = img->get_image_plane_at_pos_any_depth(1,xP/SubWidthC,yP/SubHeightC);
315
2.76M
  stride[1] = img->get_image_stride(1);
316
317
2.76M
  pixels[2] = img->get_image_plane_at_pos_any_depth(2,xP/SubWidthC,yP/SubHeightC);
318
2.76M
  stride[2] = img->get_image_stride(2);
319
320
321
2.76M
  ALIGNED_16(int16_t) predSamplesL                 [2 /* LX */][MAX_CU_SIZE* MAX_CU_SIZE];
322
2.76M
  ALIGNED_16(int16_t) predSamplesC[2 /* chroma */ ][2 /* LX */][MAX_CU_SIZE* MAX_CU_SIZE];
323
324
  //int xP = xC+xB;
325
  //int yP = yC+yB;
326
327
2.76M
  int predFlag[2];
328
2.76M
  predFlag[0] = vi->predFlag[0];
329
2.76M
  predFlag[1] = vi->predFlag[1];
330
331
2.76M
  const int bit_depth_L = sps->BitDepth_Y;
332
2.76M
  const int bit_depth_C = sps->BitDepth_C;
333
334
  // Some encoders use bi-prediction with two similar MVs.
335
  // Identify this case and use only one MV.
336
337
  // do this only without weighted prediction, because the weights/offsets may be different
338
2.76M
  if (pps->weighted_pred_flag==0) {
339
812k
    if (predFlag[0] && predFlag[1]) {
340
317k
      if (vi->mv[0].x == vi->mv[1].x &&
341
317k
          vi->mv[0].y == vi->mv[1].y &&
342
317k
          shdr->RefPicList[0][vi->refIdx[0]] ==
343
238k
          shdr->RefPicList[1][vi->refIdx[1]]) {
344
178k
        predFlag[1] = 0;
345
178k
      }
346
317k
    }
347
812k
  }
348
349
350
8.28M
  for (int l=0;l<2;l++) {
351
5.52M
    if (predFlag[l]) {
352
      // 8.5.3.2.1
353
354
3.59M
      if (vi->refIdx[l] >= MAX_NUM_REF_PICS) {
355
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
356
0
        ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
357
0
        return;
358
0
      }
359
360
3.59M
      const de265_image* refPic = ctx->get_image(shdr->RefPicList[l][vi->refIdx[l]]);
361
362
3.59M
      logtrace(LogMotion, "refIdx: %d -> dpb[%d]\n", vi->refIdx[l], shdr->RefPicList[l][vi->refIdx[l]]);
363
364
3.59M
      if (!refPic || refPic->PicState == UnusedForReference) {
365
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
366
0
        ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
367
368
        // TODO: fill predSamplesC with black or grey
369
0
      }
370
3.59M
      else if (refPic->get_width(0) != sps->pic_width_in_luma_samples ||
371
3.59M
               refPic->get_height(0) != sps->pic_height_in_luma_samples ||
372
3.59M
               img->get_chroma_format() != refPic->get_chroma_format()) {
373
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
374
0
        ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_SIZE_DOES_NOT_MATCH_SPS, false);
375
0
      }
376
3.59M
      else if (img->get_bit_depth(0) != refPic->get_bit_depth(0) ||
377
3.59M
               img->get_bit_depth(1) != refPic->get_bit_depth(1)) {
378
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
379
0
        ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_BIT_DEPTH_DOES_NOT_MATCH, false);
380
0
      }
381
3.59M
      else if (img->get_chroma_format() != refPic->get_chroma_format()) {
382
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
383
0
        ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_CHROMA_FORMAT_DOES_NOT_MATCH, false);
384
0
      }
385
3.59M
      else {
386
        // 8.5.3.2.2
387
388
3.59M
        logtrace(LogMotion,"do MC: L%d,MV=%d;%d RefPOC=%d\n",
389
3.59M
                 l,vi->mv[l].x,vi->mv[l].y,refPic->PicOrderCntVal);
390
391
392
        // TODO: must predSamples stride really be nCS or can it be something smaller like nPbW?
393
394
3.59M
        if (img->high_bit_depth(0)) {
395
1.54M
          mc_luma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
396
1.54M
                  predSamplesL[l],nCS,
397
1.54M
                  (const uint16_t*)refPic->get_image_plane(0),
398
1.54M
                  refPic->get_luma_stride(), nPbW,nPbH, bit_depth_L);
399
1.54M
        }
400
2.05M
        else {
401
2.05M
          mc_luma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
402
2.05M
                  predSamplesL[l],nCS,
403
2.05M
                  (const uint8_t*)refPic->get_image_plane(0),
404
2.05M
                  refPic->get_luma_stride(), nPbW,nPbH, bit_depth_L);
405
2.05M
        }
406
407
3.59M
        if (img->get_chroma_format() != de265_chroma_mono) {
408
3.31M
          if (img->high_bit_depth(1)) {
409
1.26M
            mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP,
410
1.26M
                      predSamplesC[0][l], nCS, (const uint16_t*) refPic->get_image_plane(1),
411
1.26M
                      refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
412
1.26M
            mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP,
413
1.26M
                      predSamplesC[1][l], nCS, (const uint16_t*) refPic->get_image_plane(2),
414
1.26M
                      refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
415
1.26M
          }
416
2.04M
          else {
417
2.04M
            mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP,
418
2.04M
                      predSamplesC[0][l], nCS, (const uint8_t*) refPic->get_image_plane(1),
419
2.04M
                      refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
420
2.04M
            mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP,
421
2.04M
                      predSamplesC[1][l], nCS, (const uint8_t*) refPic->get_image_plane(2),
422
2.04M
                      refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
423
2.04M
          }
424
3.31M
        }
425
3.59M
      }
426
3.59M
    }
427
5.52M
  }
428
429
430
  // weighted sample prediction  (8.5.3.2.3)
431
432
2.76M
  const int shift1_L = libde265_max(2,14-sps->BitDepth_Y);
433
2.76M
  const int offset_shift1_L = img->get_sps().WpOffsetBdShiftY;
434
2.76M
  const int shift1_C = libde265_max(2,14-sps->BitDepth_C);
435
2.76M
  const int offset_shift1_C = img->get_sps().WpOffsetBdShiftC;
436
437
  /*
438
  const int shift1_L = 14-img->sps.BitDepth_Y;
439
  const int offset_shift1_L = img->sps.BitDepth_Y-8;
440
  const int shift1_C = 14-img->sps.BitDepth_C;
441
  const int offset_shift1_C = img->sps.BitDepth_C-8;
442
  */
443
444
  /*
445
  if (0)
446
  printf("%d/%d %d/%d %d/%d %d/%d\n",
447
         shift1_L,
448
         Nshift1_L,
449
         offset_shift1_L,
450
         Noffset_shift1_L,
451
         shift1_C,
452
         Nshift1_C,
453
         offset_shift1_C,
454
         Noffset_shift1_C);
455
456
  assert(shift1_L==
457
         Nshift1_L);
458
  assert(offset_shift1_L==
459
         Noffset_shift1_L);
460
  assert(shift1_C==
461
         Nshift1_C);
462
  assert(offset_shift1_C==
463
         Noffset_shift1_C);
464
  */
465
466
467
2.76M
  logtrace(LogMotion,"predFlags (modified): %d %d\n", predFlag[0], predFlag[1]);
468
469
2.76M
  if (shdr->slice_type == SLICE_TYPE_P) {
470
298k
    if (pps->weighted_pred_flag==0) {
471
143k
      if (predFlag[0]==1 && predFlag[1]==0) {
472
143k
        ctx->acceleration.put_unweighted_pred(pixels[0], stride[0],
473
143k
                                              predSamplesL[0],nCS, nPbW,nPbH, bit_depth_L);
474
475
143k
        if (img->get_chroma_format() != de265_chroma_mono) {
476
122k
          ctx->acceleration.put_unweighted_pred(pixels[1], stride[1],
477
122k
                                                predSamplesC[0][0], nCS,
478
122k
                                                nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
479
122k
          ctx->acceleration.put_unweighted_pred(pixels[2], stride[2],
480
122k
                                                predSamplesC[1][0], nCS,
481
122k
                                                nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
482
122k
        }
483
143k
      }
484
0
      else {
485
0
        ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
486
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
487
0
      }
488
143k
    }
489
154k
    else {
490
      // weighted prediction
491
492
154k
      if (predFlag[0]==1 && predFlag[1]==0) {
493
494
154k
        int refIdx0 = vi->refIdx[0];
495
496
154k
        int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
497
154k
        int chroma_log2WD = shdr->ChromaLog2WeightDenom  + shift1_C;
498
499
154k
        int luma_w0 = shdr->LumaWeight[0][refIdx0];
500
154k
        int luma_o0 = shdr->luma_offset[0][refIdx0] * (1<<(offset_shift1_L));
501
502
154k
        int chroma0_w0 = shdr->ChromaWeight[0][refIdx0][0];
503
154k
        int chroma0_o0 = shdr->ChromaOffset[0][refIdx0][0] * (1<<(offset_shift1_C));
504
154k
        int chroma1_w0 = shdr->ChromaWeight[0][refIdx0][1];
505
154k
        int chroma1_o0 = shdr->ChromaOffset[0][refIdx0][1] * (1<<(offset_shift1_C));
506
507
154k
        logtrace(LogMotion,"weighted-0 [%d] %d %d %d  %dx%d\n", refIdx0, luma_log2WD-6,luma_w0,luma_o0,nPbW,nPbH);
508
509
154k
        ctx->acceleration.put_weighted_pred(pixels[0], stride[0],
510
154k
                                            predSamplesL[0],nCS, nPbW,nPbH,
511
154k
                                            luma_w0, luma_o0, luma_log2WD, bit_depth_L);
512
154k
        if (img->get_chroma_format() != de265_chroma_mono) {
513
130k
          ctx->acceleration.put_weighted_pred(pixels[1], stride[1],
514
130k
                                              predSamplesC[0][0], nCS, nPbW / SubWidthC, nPbH / SubHeightC,
515
130k
                                              chroma0_w0, chroma0_o0, chroma_log2WD, bit_depth_C);
516
130k
          ctx->acceleration.put_weighted_pred(pixels[2], stride[2],
517
130k
                                              predSamplesC[1][0], nCS, nPbW / SubWidthC, nPbH / SubHeightC,
518
130k
                                              chroma1_w0, chroma1_o0, chroma_log2WD, bit_depth_C);
519
130k
        }
520
154k
      }
521
0
      else {
522
0
        ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
523
0
        img->integrity = INTEGRITY_DECODING_ERRORS;
524
0
      }
525
154k
    }
526
298k
  }
527
2.46M
  else {
528
2.46M
    assert(shdr->slice_type == SLICE_TYPE_B);
529
530
2.46M
    if (predFlag[0]==1 && predFlag[1]==1) {
531
832k
      if (pps->weighted_bipred_flag==0) {
532
        //const int shift2  = 15-8; // TODO: real bit depth
533
        //const int offset2 = 1<<(shift2-1);
534
535
515k
        int16_t* in0 = predSamplesL[0];
536
515k
        int16_t* in1 = predSamplesL[1];
537
538
515k
        ctx->acceleration.put_weighted_pred_avg(pixels[0], stride[0],
539
515k
                                                in0,in1, nCS, nPbW, nPbH, bit_depth_L);
540
541
515k
        int16_t* in00 = predSamplesC[0][0];
542
515k
        int16_t* in01 = predSamplesC[0][1];
543
515k
        int16_t* in10 = predSamplesC[1][0];
544
515k
        int16_t* in11 = predSamplesC[1][1];
545
546
515k
        if (img->get_chroma_format() != de265_chroma_mono) {
547
472k
          ctx->acceleration.put_weighted_pred_avg(pixels[1], stride[1],
548
472k
                                                  in00, in01, nCS,
549
472k
                                                  nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
550
472k
          ctx->acceleration.put_weighted_pred_avg(pixels[2], stride[2],
551
472k
                                                  in10, in11, nCS,
552
472k
                                                  nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
553
472k
        }
554
515k
      }
555
317k
      else {
556
        // weighted prediction
557
558
317k
        int refIdx0 = vi->refIdx[0];
559
317k
        int refIdx1 = vi->refIdx[1];
560
561
317k
        int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
562
317k
        int chroma_log2WD = shdr->ChromaLog2WeightDenom + shift1_C;
563
564
317k
        int luma_w0 = shdr->LumaWeight[0][refIdx0];
565
317k
        int luma_o0 = shdr->luma_offset[0][refIdx0] * (1<<(offset_shift1_L));
566
317k
        int luma_w1 = shdr->LumaWeight[1][refIdx1];
567
317k
        int luma_o1 = shdr->luma_offset[1][refIdx1] * (1<<(offset_shift1_L));
568
569
317k
        int chroma0_w0 = shdr->ChromaWeight[0][refIdx0][0];
570
317k
        int chroma0_o0 = shdr->ChromaOffset[0][refIdx0][0] * (1<<(offset_shift1_C));
571
317k
        int chroma1_w0 = shdr->ChromaWeight[0][refIdx0][1];
572
317k
        int chroma1_o0 = shdr->ChromaOffset[0][refIdx0][1] * (1<<(offset_shift1_C));
573
317k
        int chroma0_w1 = shdr->ChromaWeight[1][refIdx1][0];
574
317k
        int chroma0_o1 = shdr->ChromaOffset[1][refIdx1][0] * (1<<(offset_shift1_C));
575
317k
        int chroma1_w1 = shdr->ChromaWeight[1][refIdx1][1];
576
317k
        int chroma1_o1 = shdr->ChromaOffset[1][refIdx1][1] * (1<<(offset_shift1_C));
577
578
317k
        logtrace(LogMotion,"weighted-BI-0 [%d] %d %d %d  %dx%d\n", refIdx0, luma_log2WD-6,luma_w0,luma_o0,nPbW,nPbH);
579
317k
        logtrace(LogMotion,"weighted-BI-1 [%d] %d %d %d  %dx%d\n", refIdx1, luma_log2WD-6,luma_w1,luma_o1,nPbW,nPbH);
580
581
317k
        int16_t* in0 = predSamplesL[0];
582
317k
        int16_t* in1 = predSamplesL[1];
583
584
317k
        ctx->acceleration.put_weighted_bipred(pixels[0], stride[0],
585
317k
                                              in0,in1, nCS, nPbW, nPbH,
586
317k
                                              luma_w0,luma_o0,
587
317k
                                              luma_w1,luma_o1,
588
317k
                                              luma_log2WD, bit_depth_L);
589
590
317k
        int16_t* in00 = predSamplesC[0][0];
591
317k
        int16_t* in01 = predSamplesC[0][1];
592
317k
        int16_t* in10 = predSamplesC[1][0];
593
317k
        int16_t* in11 = predSamplesC[1][1];
594
595
317k
        if (img->get_chroma_format() != de265_chroma_mono) {
596
294k
          ctx->acceleration.put_weighted_bipred(pixels[1], stride[1],
597
294k
                                                in00, in01, nCS, nPbW / SubWidthC, nPbH / SubHeightC,
598
294k
                                                chroma0_w0, chroma0_o0,
599
294k
                                                chroma0_w1, chroma0_o1,
600
294k
                                                chroma_log2WD, bit_depth_C);
601
294k
          ctx->acceleration.put_weighted_bipred(pixels[2], stride[2],
602
294k
                                                in10, in11, nCS, nPbW / SubWidthC, nPbH / SubHeightC,
603
294k
                                                chroma1_w0, chroma1_o0,
604
294k
                                                chroma1_w1, chroma1_o1,
605
294k
                                                chroma_log2WD, bit_depth_C);
606
294k
        }
607
317k
      }
608
832k
    }
609
1.62M
    else if (predFlag[0]==1 || predFlag[1]==1) {
610
1.62M
      int l = predFlag[0] ? 0 : 1;
611
612
1.62M
      if (pps->weighted_bipred_flag==0) {
613
987k
        ctx->acceleration.put_unweighted_pred(pixels[0], stride[0],
614
987k
                                              predSamplesL[l],nCS, nPbW,nPbH, bit_depth_L);
615
616
987k
        if (img->get_chroma_format() != de265_chroma_mono) {
617
922k
          ctx->acceleration.put_unweighted_pred(pixels[1], stride[1],
618
922k
                                                predSamplesC[0][l], nCS,
619
922k
                                                nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
620
922k
          ctx->acceleration.put_unweighted_pred(pixels[2], stride[2],
621
922k
                                                predSamplesC[1][l], nCS,
622
922k
                                                nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C);
623
922k
        }
624
987k
      }
625
642k
      else {
626
642k
        int refIdx = vi->refIdx[l];
627
628
642k
        int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
629
642k
        int chroma_log2WD = shdr->ChromaLog2WeightDenom  + shift1_C;
630
631
642k
        int luma_w = shdr->LumaWeight[l][refIdx];
632
642k
        int luma_o = shdr->luma_offset[l][refIdx] * (1<<(offset_shift1_L));
633
634
642k
        int chroma0_w = shdr->ChromaWeight[l][refIdx][0];
635
642k
        int chroma0_o = shdr->ChromaOffset[l][refIdx][0] * (1<<(offset_shift1_C));
636
642k
        int chroma1_w = shdr->ChromaWeight[l][refIdx][1];
637
642k
        int chroma1_o = shdr->ChromaOffset[l][refIdx][1] * (1<<(offset_shift1_C));
638
639
642k
        logtrace(LogMotion,"weighted-B-L%d [%d] %d %d %d  %dx%d\n", l, refIdx, luma_log2WD-6,luma_w,luma_o,nPbW,nPbH);
640
641
642k
        ctx->acceleration.put_weighted_pred(pixels[0], stride[0],
642
642k
                                            predSamplesL[l],nCS, nPbW,nPbH,
643
642k
                                            luma_w, luma_o, luma_log2WD, bit_depth_L);
644
645
642k
        if (img->get_chroma_format() != de265_chroma_mono) {
646
605k
          ctx->acceleration.put_weighted_pred(pixels[1], stride[1],
647
605k
                                              predSamplesC[0][l], nCS,
648
605k
                                              nPbW / SubWidthC, nPbH / SubHeightC,
649
605k
                                              chroma0_w, chroma0_o, chroma_log2WD, bit_depth_C);
650
605k
          ctx->acceleration.put_weighted_pred(pixels[2], stride[2],
651
605k
                                              predSamplesC[1][l], nCS,
652
605k
                                              nPbW / SubWidthC, nPbH / SubHeightC,
653
605k
                                              chroma1_w, chroma1_o, chroma_log2WD, bit_depth_C);
654
605k
        }
655
642k
      }
656
1.62M
    }
657
0
    else {
658
      // TODO: check why it can actually happen that both predFlags[] are false.
659
      // For now, we ignore this and continue decoding.
660
661
0
      ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
662
0
      img->integrity = INTEGRITY_DECODING_ERRORS;
663
0
    }
664
2.46M
  }
665
666
#if defined(DE265_LOG_TRACE) && 0
667
  logtrace(LogTransform,"MC pixels (luma), position %d %d:\n", xP,yP);
668
669
  for (int y=0;y<nPbH;y++) {
670
    logtrace(LogTransform,"MC-y-%d-%d ",xP,yP+y);
671
672
    for (int x=0;x<nPbW;x++) {
673
      logtrace(LogTransform,"*%02x ", pixels[0][x+y*stride[0]]);
674
    }
675
676
    logtrace(LogTransform,"*\n");
677
  }
678
679
680
  logtrace(LogTransform,"MC pixels (chroma cb), position %d %d:\n", xP/2,yP/2);
681
682
  for (int y=0;y<nPbH/2;y++) {
683
    logtrace(LogTransform,"MC-cb-%d-%d ",xP/2,yP/2+y);
684
685
    for (int x=0;x<nPbW/2;x++) {
686
      logtrace(LogTransform,"*%02x ", pixels[1][x+y*stride[1]]);
687
    }
688
689
    logtrace(LogTransform,"*\n");
690
  }
691
692
693
  logtrace(LogTransform,"MC pixels (chroma cr), position %d %d:\n", xP/2,yP/2);
694
695
  for (int y=0;y<nPbH/2;y++) {
696
    logtrace(LogTransform,"MC-cr-%d-%d ",xP/2,yP/2+y);
697
698
    for (int x=0;x<nPbW/2;x++) {
699
      logtrace(LogTransform,"*%02x ", pixels[2][x+y*stride[2]]);
700
    }
701
702
    logtrace(LogTransform,"*\n");
703
  }
704
#endif
705
2.76M
}
706
707
708
#ifdef DE265_LOG_TRACE
709
void logmvcand(const PBMotion& p)
710
{
711
  for (int v=0;v<2;v++) {
712
    if (p.predFlag[v]) {
713
      logtrace(LogMotion,"  %d: %s  %d;%d ref=%d\n", v, p.predFlag[v] ? "yes":"no ",
714
               p.mv[v].x,p.mv[v].y, p.refIdx[v]);
715
    } else {
716
      logtrace(LogMotion,"  %d: %s  --;-- ref=--\n", v, p.predFlag[v] ? "yes":"no ");
717
    }
718
  }
719
}
720
#else
721
#define logmvcand(p)
722
#endif
723
724
725
bool PBMotion::operator==(const PBMotion& b) const
726
505k
{
727
505k
  const PBMotion& a = *this;
728
729
  // TODO: is this really correct? no check for predFlag? Standard says so... (p.127)
730
731
1.18M
  for (int i=0;i<2;i++) {
732
871k
    if (a.predFlag[i] != b.predFlag[i]) return false;
733
734
788k
    if (a.predFlag[i]) {
735
615k
      if (a.mv[i].x != b.mv[i].x) return false;
736
546k
      if (a.mv[i].y != b.mv[i].y) return false;
737
524k
      if (a.refIdx[i] != b.refIdx[i]) return false;
738
524k
    }
739
788k
  }
740
741
312k
  return true;
742
505k
}
743
744
745
class MotionVectorAccess_de265_image : public MotionVectorAccess
746
{
747
public:
748
1.99M
  MotionVectorAccess_de265_image(const de265_image* i) : img(i) { }
749
750
1.99M
  enum PartMode get_PartMode(int x,int y) const override { return img->get_PartMode(x,y); }
751
1.58M
  const PBMotion& get_mv_info(int x,int y) const override { return img->get_mv_info(x,y); }
752
753
private:
754
  const de265_image* img;
755
};
756
757
758
759
/*
760
  +--+                +--+--+
761
  |B2|                |B1|B0|
762
  +--+----------------+--+--+
763
     |                   |
764
     |                   |
765
     |                   |
766
     |                   |
767
     |        PB         |
768
     |                   |
769
     |                   |
770
  +--+                   |
771
  |A1|                   |
772
  +--+-------------------+
773
  |A0|
774
  +--+
775
*/
776
777
778
// 8.5.3.1.2
779
// TODO: check: can we fill the candidate list directly in this function and omit to copy later
780
/*
781
  xC/yC:  CB position
782
  nCS:    CB size                 (probably modified because of singleMCLFlag)
783
  xP/yP:  PB position (absolute)  (probably modified because of singleMCLFlag)
784
  singleMCLFlag
785
  nPbW/nPbH: PB size
786
  partIdx
787
  out_cand: merging candidate vectors
788
789
  Add these candidates:
790
  - A1
791
  - B1  (if != A1)
792
  - B0  (if != B1)
793
  - A0  (if != A1)
794
  - B2  (if != A1 and != B1)
795
796
  A maximum of 4 candidates are generated.
797
798
  Note 1: For a CB split into two PBs, it does not make sense to merge the
799
  second part to the parameters of the first part, since then, we could use 2Nx2N
800
  right away. -> Exclude this candidate.
801
*/
802
int derive_spatial_merging_candidates(//const de265_image* img,
803
                                      const MotionVectorAccess& mvaccess,
804
                                      const de265_image* img,
805
                                      int xC, int yC, int nCS, int xP, int yP,
806
                                      uint8_t singleMCLFlag,
807
                                      int nPbW, int nPbH,
808
                                      int partIdx,
809
                                      PBMotion* out_cand,
810
                                      int maxCandidates)
811
1.99M
{
812
1.99M
  const pic_parameter_set* pps = &img->get_pps();
813
1.99M
  const int log2_parallel_merge_level = pps->log2_parallel_merge_level;
814
815
1.99M
  enum PartMode PartMode = mvaccess.get_PartMode(xC,yC);
816
817
  /*
818
  const int A0 = SpatialMergingCandidates::PRED_A0;
819
  const int A1 = SpatialMergingCandidates::PRED_A1;
820
  const int B0 = SpatialMergingCandidates::PRED_B0;
821
  const int B1 = SpatialMergingCandidates::PRED_B1;
822
  const int B2 = SpatialMergingCandidates::PRED_B2;
823
  */
824
825
  // --- A1 ---
826
827
  // a pixel within A1 (bottom right of A1)
828
1.99M
  int xA1 = xP-1;
829
1.99M
  int yA1 = yP+nPbH-1;
830
831
1.99M
  bool availableA1;
832
1.99M
  int idxA1;
833
834
1.99M
  int computed_candidates = 0;
835
836
  // check if candidate is in same motion-estimation region (MER) -> discard
837
1.99M
  if ((xP>>log2_parallel_merge_level) == (xA1>>log2_parallel_merge_level) &&
838
1.99M
      (yP>>log2_parallel_merge_level) == (yA1>>log2_parallel_merge_level)) {
839
113k
    availableA1 = false;
840
113k
    logtrace(LogMotion,"spatial merging candidate A1: below parallel merge level\n");
841
113k
  }
842
  // redundant candidate? (Note 1) -> discard
843
1.88M
  else if (// !singleMCLFlag &&    automatically true when partIdx==1
844
1.88M
           partIdx==1 &&
845
1.88M
           (PartMode==PART_Nx2N ||
846
192k
            PartMode==PART_nLx2N ||
847
192k
            PartMode==PART_nRx2N)) {
848
80.4k
    availableA1 = false;
849
80.4k
    logtrace(LogMotion,"spatial merging candidate A1: second part ignore\n");
850
80.4k
  }
851
  // MV available in A1
852
1.80M
  else {
853
1.80M
    availableA1 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA1,yA1);
854
1.80M
    if (!availableA1) logtrace(LogMotion,"spatial merging candidate A1: unavailable\n");
855
1.80M
  }
856
857
1.99M
  if (availableA1) {
858
1.58M
    idxA1 = computed_candidates++;
859
1.58M
    out_cand[idxA1] = mvaccess.get_mv_info(xA1,yA1);
860
861
1.58M
    logtrace(LogMotion,"spatial merging candidate A1:\n");
862
1.58M
    logmvcand(out_cand[idxA1]);
863
1.58M
  }
864
865
1.99M
  if (computed_candidates>=maxCandidates) return computed_candidates;
866
867
868
  // --- B1 ---
869
870
651k
  int xB1 = xP+nPbW-1;
871
651k
  int yB1 = yP-1;
872
873
651k
  bool availableB1;
874
651k
  int idxB1;
875
876
  // same MER -> discard
877
651k
  if ((xP>>log2_parallel_merge_level) == (xB1>>log2_parallel_merge_level) &&
878
651k
      (yP>>log2_parallel_merge_level) == (yB1>>log2_parallel_merge_level)) {
879
80.4k
    availableB1 = false;
880
80.4k
    logtrace(LogMotion,"spatial merging candidate B1: below parallel merge level\n");
881
80.4k
  }
882
  // redundant candidate (Note 1) -> discard
883
570k
  else if (// !singleMCLFlag &&    automatically true when partIdx==1
884
570k
           partIdx==1 &&
885
570k
           (PartMode==PART_2NxN ||
886
124k
            PartMode==PART_2NxnU ||
887
124k
            PartMode==PART_2NxnD)) {
888
40.7k
    availableB1 = false;
889
40.7k
    logtrace(LogMotion,"spatial merging candidate B1: second part ignore\n");
890
40.7k
  }
891
  // MV available in B1
892
529k
  else {
893
529k
    availableB1 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB1,yB1);
894
529k
    if (!availableB1) logtrace(LogMotion,"spatial merging candidate B1: unavailable\n");
895
529k
  }
896
897
651k
  if (availableB1) {
898
478k
    const PBMotion& b1 = img->get_mv_info(xB1,yB1);
899
900
    // B1 == A1 -> discard B1
901
478k
    if (availableA1 && out_cand[idxA1] == b1) {
902
118k
      idxB1 = idxA1;
903
118k
      logtrace(LogMotion,"spatial merging candidate B1: redundant to A1\n");
904
118k
    }
905
360k
    else {
906
360k
      idxB1 = computed_candidates++;
907
360k
      out_cand[idxB1] = b1;
908
909
360k
      logtrace(LogMotion,"spatial merging candidate B1:\n");
910
360k
      logmvcand(out_cand[idxB1]);
911
360k
    }
912
478k
  }
913
914
651k
  if (computed_candidates>=maxCandidates) return computed_candidates;
915
916
917
  // --- B0 ---
918
919
338k
  int xB0 = xP+nPbW;
920
338k
  int yB0 = yP-1;
921
922
338k
  bool availableB0;
923
338k
  int idxB0;
924
925
338k
  if ((xP>>log2_parallel_merge_level) == (xB0>>log2_parallel_merge_level) &&
926
338k
      (yP>>log2_parallel_merge_level) == (yB0>>log2_parallel_merge_level)) {
927
50.2k
    availableB0 = false;
928
50.2k
    logtrace(LogMotion,"spatial merging candidate B0: below parallel merge level\n");
929
50.2k
  }
930
288k
  else {
931
288k
    availableB0 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB0,yB0);
932
288k
    if (!availableB0) logtrace(LogMotion,"spatial merging candidate B0: unavailable\n");
933
288k
  }
934
935
338k
  if (availableB0) {
936
116k
    const PBMotion& b0 = img->get_mv_info(xB0,yB0);
937
938
    // B0 == B1 -> discard B0
939
116k
    if (availableB1 && out_cand[idxB1]==b0) {
940
68.7k
      idxB0 = idxB1;
941
68.7k
      logtrace(LogMotion,"spatial merging candidate B0: redundant to B1\n");
942
68.7k
    }
943
48.2k
    else {
944
48.2k
      idxB0 = computed_candidates++;
945
48.2k
      out_cand[idxB0] = b0;
946
48.2k
      logtrace(LogMotion,"spatial merging candidate B0:\n");
947
48.2k
      logmvcand(out_cand[idxB0]);
948
48.2k
    }
949
116k
  }
950
951
338k
  if (computed_candidates>=maxCandidates) return computed_candidates;
952
953
954
  // --- A0 ---
955
956
298k
  int xA0 = xP-1;
957
298k
  int yA0 = yP+nPbH;
958
959
298k
  bool availableA0;
960
298k
  int idxA0;
961
962
298k
  if ((xP>>log2_parallel_merge_level) == (xA0>>log2_parallel_merge_level) &&
963
298k
      (yP>>log2_parallel_merge_level) == (yA0>>log2_parallel_merge_level)) {
964
67.6k
    availableA0 = false;
965
67.6k
    logtrace(LogMotion,"spatial merging candidate A0: below parallel merge level\n");
966
67.6k
  }
967
230k
  else {
968
230k
    availableA0 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA0,yA0);
969
230k
    if (!availableA0) logtrace(LogMotion,"spatial merging candidate A0: unavailable\n");
970
230k
  }
971
972
298k
  if (availableA0) {
973
43.1k
    const PBMotion& a0 = img->get_mv_info(xA0,yA0);
974
975
    // A0 == A1 -> discard A0
976
43.1k
    if (availableA1 && out_cand[idxA1]==a0) {
977
30.7k
      idxA0 = idxA1;
978
30.7k
      logtrace(LogMotion,"spatial merging candidate A0: redundant to A1\n");
979
30.7k
    }
980
12.3k
    else {
981
12.3k
      idxA0 = computed_candidates++;
982
12.3k
      out_cand[idxA0] = a0;
983
12.3k
      logtrace(LogMotion,"spatial merging candidate A0:\n");
984
12.3k
      logmvcand(out_cand[idxA0]);
985
12.3k
    }
986
43.1k
  }
987
988
298k
  if (computed_candidates>=maxCandidates) return computed_candidates;
989
990
991
  // --- B2 ---
992
993
288k
  int xB2 = xP-1;
994
288k
  int yB2 = yP-1;
995
996
288k
  bool availableB2;
997
288k
  int idxB2;
998
999
  // if we already have four candidates, do not consider B2 anymore
1000
288k
  if (computed_candidates==4) {
1001
240
    availableB2 = false;
1002
240
    logtrace(LogMotion,"spatial merging candidate B2: ignore\n");
1003
240
  }
1004
288k
  else if ((xP>>log2_parallel_merge_level) == (xB2>>log2_parallel_merge_level) &&
1005
288k
           (yP>>log2_parallel_merge_level) == (yB2>>log2_parallel_merge_level)) {
1006
77.5k
    availableB2 = false;
1007
77.5k
    logtrace(LogMotion,"spatial merging candidate B2: below parallel merge level\n");
1008
77.5k
  }
1009
211k
  else {
1010
211k
    availableB2 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB2,yB2);
1011
211k
    if (!availableB2) logtrace(LogMotion,"spatial merging candidate B2: unavailable\n");
1012
211k
  }
1013
1014
288k
  if (availableB2) {
1015
125k
    const PBMotion& b2 = img->get_mv_info(xB2,yB2);
1016
1017
    // B2 == B1 -> discard B2
1018
125k
    if (availableB1 && out_cand[idxB1]==b2) {
1019
82.8k
      idxB2 = idxB1;
1020
82.8k
      logtrace(LogMotion,"spatial merging candidate B2: redundant to B1\n");
1021
82.8k
    }
1022
    // B2 == A1 -> discard B2
1023
42.8k
    else if (availableA1 && out_cand[idxA1]==b2) {
1024
11.3k
      idxB2 = idxA1;
1025
11.3k
      logtrace(LogMotion,"spatial merging candidate B2: redundant to A1\n");
1026
11.3k
    }
1027
31.4k
    else {
1028
31.4k
      idxB2 = computed_candidates++;
1029
31.4k
      out_cand[idxB2] = b2;
1030
31.4k
      logtrace(LogMotion,"spatial merging candidate B2:\n");
1031
31.4k
      logmvcand(out_cand[idxB2]);
1032
31.4k
    }
1033
125k
  }
1034
1035
288k
  return computed_candidates;
1036
298k
}
1037
1038
1039
// 8.5.3.1.4
1040
void derive_zero_motion_vector_candidates(const slice_segment_header* shdr,
1041
                                          PBMotion* out_mergeCandList,
1042
                                          int* inout_numCurrMergeCand,
1043
                                          int maxCandidates)
1044
1.99M
{
1045
1.99M
  logtrace(LogMotion,"derive_zero_motion_vector_candidates\n");
1046
1047
1.99M
  int numRefIdx;
1048
1049
1.99M
  if (shdr->slice_type==SLICE_TYPE_P) {
1050
272k
    numRefIdx = shdr->num_ref_idx_l0_active;
1051
272k
  }
1052
1.72M
  else {
1053
1.72M
    numRefIdx = libde265_min(shdr->num_ref_idx_l0_active,
1054
1.72M
                             shdr->num_ref_idx_l1_active);
1055
1.72M
  }
1056
1057
1058
  //int numInputMergeCand = *inout_numMergeCand;
1059
1.99M
  int zeroIdx = 0;
1060
1061
2.29M
  while (*inout_numCurrMergeCand < maxCandidates) {
1062
    // 1.
1063
1064
301k
    logtrace(LogMotion,"zeroIdx:%d numRefIdx:%d\n", zeroIdx, numRefIdx);
1065
1066
301k
    PBMotion* newCand = &out_mergeCandList[*inout_numCurrMergeCand];
1067
1068
301k
    const int refIdx = (zeroIdx < numRefIdx) ? zeroIdx : 0;
1069
1070
301k
    if (shdr->slice_type==SLICE_TYPE_P) {
1071
35.9k
      newCand->refIdx[0] = refIdx;
1072
35.9k
      newCand->refIdx[1] = -1;
1073
35.9k
      newCand->predFlag[0] = 1;
1074
35.9k
      newCand->predFlag[1] = 0;
1075
35.9k
    }
1076
265k
    else {
1077
265k
      newCand->refIdx[0] = refIdx;
1078
265k
      newCand->refIdx[1] = refIdx;
1079
265k
      newCand->predFlag[0] = 1;
1080
265k
      newCand->predFlag[1] = 1;
1081
265k
    }
1082
1083
301k
    newCand->mv[0].x = 0;
1084
301k
    newCand->mv[0].y = 0;
1085
301k
    newCand->mv[1].x = 0;
1086
301k
    newCand->mv[1].y = 0;
1087
1088
301k
    (*inout_numCurrMergeCand)++;
1089
1090
    // 2.
1091
1092
301k
    zeroIdx++;
1093
301k
  }
1094
1.99M
}
1095
1096
1097
bool scale_mv(MotionVector* out_mv, MotionVector mv, int colDist, int currDist)
1098
20.0k
{
1099
20.0k
  int td = Clip3(-128,127, colDist);
1100
20.0k
  int tb = Clip3(-128,127, currDist);
1101
1102
20.0k
  if (td==0) {
1103
2.64k
    *out_mv = mv;
1104
2.64k
    return false;
1105
2.64k
  }
1106
17.4k
  else {
1107
17.4k
    int tx = (16384 + (abs_value(td)>>1)) / td;
1108
17.4k
    int distScaleFactor = Clip3(-4096,4095, (tb*tx+32)>>6);
1109
17.4k
    out_mv->x = Clip3(-32768,32767,
1110
17.4k
                      Sign(distScaleFactor*mv.x)*((abs_value(distScaleFactor*mv.x)+127)>>8));
1111
17.4k
    out_mv->y = Clip3(-32768,32767,
1112
17.4k
                      Sign(distScaleFactor*mv.y)*((abs_value(distScaleFactor*mv.y)+127)>>8));
1113
17.4k
    return true;
1114
17.4k
  }
1115
20.0k
}
1116
1117
1118
// (L1003) 8.5.3.2.8
1119
1120
void derive_collocated_motion_vectors(base_context* ctx,
1121
                                      de265_image* img,
1122
                                      const slice_segment_header* shdr,
1123
                                      int xP,int yP,
1124
                                      int colPic,
1125
                                      int xColPb,int yColPb,
1126
                                      int refIdxLX,  // (always 0 for merge mode)
1127
                                      int X,
1128
                                      MotionVector* out_mvLXCol,
1129
                                      uint8_t* out_availableFlagLXCol)
1130
887k
{
1131
887k
  logtrace(LogMotion,"derive_collocated_motion_vectors %d;%d\n",xP,yP);
1132
1133
1134
  // get collocated image and the prediction mode at the collocated position
1135
1136
887k
  assert(ctx->has_image(colPic));
1137
887k
  const de265_image* colImg = ctx->get_image(colPic);
1138
1139
  // check for access outside image area
1140
1141
887k
  if (xColPb >= colImg->get_width() ||
1142
887k
      yColPb >= colImg->get_height()) {
1143
0
    ctx->add_warning(DE265_WARNING_COLLOCATED_MOTION_VECTOR_OUTSIDE_IMAGE_AREA, false);
1144
0
    *out_availableFlagLXCol = 0;
1145
0
    return;
1146
0
  }
1147
1148
887k
  enum PredMode predMode = colImg->get_pred_mode(xColPb,yColPb);
1149
1150
1151
  // collocated block is Intra -> no collocated MV
1152
1153
887k
  if (predMode == MODE_INTRA) {
1154
834k
    out_mvLXCol->x = 0;
1155
834k
    out_mvLXCol->y = 0;
1156
834k
    *out_availableFlagLXCol = 0;
1157
834k
    return;
1158
834k
  }
1159
1160
1161
53.2k
  logtrace(LogMotion,"colPic:%d (POC=%d) X:%d refIdxLX:%d refpiclist:%d\n",
1162
53.2k
           colPic,
1163
53.2k
           colImg->PicOrderCntVal,
1164
53.2k
           X,refIdxLX,shdr->RefPicList[X][refIdxLX]);
1165
1166
1167
  // collocated reference image is unavailable -> no collocated MV
1168
1169
53.2k
  if (colImg->integrity == INTEGRITY_UNAVAILABLE_REFERENCE) {
1170
0
    out_mvLXCol->x = 0;
1171
0
    out_mvLXCol->y = 0;
1172
0
    *out_availableFlagLXCol = 0;
1173
0
    return;
1174
0
  }
1175
1176
1177
  // get the collocated MV
1178
1179
53.2k
  const PBMotion& mvi = colImg->get_mv_info(xColPb,yColPb);
1180
53.2k
  int listCol;
1181
53.2k
  int refIdxCol;
1182
53.2k
  MotionVector mvCol;
1183
1184
53.2k
  logtrace(LogMotion,"read MVI %d;%d:\n",xColPb,yColPb);
1185
53.2k
  logmvcand(mvi);
1186
1187
1188
  // collocated MV uses only L1 -> use L1
1189
53.2k
  if (mvi.predFlag[0]==0) {
1190
15.1k
    mvCol = mvi.mv[1];
1191
15.1k
    refIdxCol = mvi.refIdx[1];
1192
15.1k
    listCol = 1;
1193
15.1k
  }
1194
  // collocated MV uses only L0 -> use L0
1195
38.0k
  else if (mvi.predFlag[1]==0) {
1196
16.5k
    mvCol = mvi.mv[0];
1197
16.5k
    refIdxCol = mvi.refIdx[0];
1198
16.5k
    listCol = 0;
1199
16.5k
  }
1200
  // collocated MV uses L0 and L1
1201
21.4k
  else {
1202
21.4k
    bool allRefFramesBeforeCurrentFrame = true;
1203
1204
21.4k
    const int currentPOC = img->PicOrderCntVal;
1205
1206
    // all reference POCs earlier than current POC (list 1)
1207
    // Test L1 first, because there is a higher change to find a future reference frame.
1208
1209
50.6k
    for (int rIdx=0; rIdx<shdr->num_ref_idx_l1_active && allRefFramesBeforeCurrentFrame; rIdx++)
1210
29.1k
      {
1211
29.1k
        const de265_image* refimg = ctx->get_image(shdr->RefPicList[1][rIdx]);
1212
29.1k
        int refPOC = refimg->PicOrderCntVal;
1213
1214
29.1k
        if (refPOC > currentPOC) {
1215
5.32k
          allRefFramesBeforeCurrentFrame = false;
1216
5.32k
        }
1217
29.1k
      }
1218
1219
    // all reference POCs earlier than current POC (list 0)
1220
1221
48.3k
    for (int rIdx=0; rIdx<shdr->num_ref_idx_l0_active && allRefFramesBeforeCurrentFrame; rIdx++)
1222
26.8k
      {
1223
26.8k
        const de265_image* refimg = ctx->get_image(shdr->RefPicList[0][rIdx]);
1224
26.8k
        int refPOC = refimg->PicOrderCntVal;
1225
1226
26.8k
        if (refPOC > currentPOC) {
1227
3.22k
          allRefFramesBeforeCurrentFrame = false;
1228
3.22k
        }
1229
26.8k
      }
1230
1231
1232
    /* TODO: What is the rationale behind this ???
1233
1234
       My guess:
1235
       when there are images before the current frame (most probably in L0) and images after
1236
       the current frame (most probably in L1), we take the reference in the opposite
1237
       direction than where the collocated frame is positioned in the hope that the distance
1238
       to the current frame will be smaller and thus give a better prediction.
1239
1240
       If all references point into the past, we cannot say much about the temporal order or
1241
       L0,L1 and thus take over both parts.
1242
     */
1243
1244
21.4k
    if (allRefFramesBeforeCurrentFrame) {
1245
12.9k
      mvCol = mvi.mv[X];
1246
12.9k
      refIdxCol = mvi.refIdx[X];
1247
12.9k
      listCol = X;
1248
12.9k
    }
1249
8.54k
    else {
1250
8.54k
      int N = shdr->collocated_from_l0_flag;
1251
8.54k
      mvCol = mvi.mv[N];
1252
8.54k
      refIdxCol = mvi.refIdx[N];
1253
8.54k
      listCol = N;
1254
8.54k
    }
1255
21.4k
  }
1256
1257
1258
1259
53.2k
  int slice_hdr_idx = colImg->get_SliceHeaderIndex(xColPb,yColPb);
1260
53.2k
  if (slice_hdr_idx >= colImg->slices.size()) {
1261
0
    ctx->add_warning(DE265_WARNING_INVALID_SLICE_HEADER_INDEX_ACCESS, false);
1262
1263
0
    *out_availableFlagLXCol = 0;
1264
0
    out_mvLXCol->x = 0;
1265
0
    out_mvLXCol->y = 0;
1266
0
    return;
1267
0
  }
1268
1269
53.2k
  const slice_segment_header* colShdr = colImg->slices[ colImg->get_SliceHeaderIndex(xColPb,yColPb) ];
1270
1271
53.2k
  if (shdr->LongTermRefPic[X][refIdxLX] !=
1272
53.2k
      colShdr->LongTermRefPic[listCol][refIdxCol]) {
1273
8.85k
    *out_availableFlagLXCol = 0;
1274
8.85k
    out_mvLXCol->x = 0;
1275
8.85k
    out_mvLXCol->y = 0;
1276
8.85k
  }
1277
44.3k
  else {
1278
44.3k
    *out_availableFlagLXCol = 1;
1279
1280
44.3k
    const bool isLongTerm = shdr->LongTermRefPic[X][refIdxLX];
1281
1282
44.3k
    int colDist  = colImg->PicOrderCntVal - colShdr->RefPicList_POC[listCol][refIdxCol];
1283
44.3k
    int currDist = img->PicOrderCntVal - shdr->RefPicList_POC[X][refIdxLX];
1284
1285
44.3k
    logtrace(LogMotion,"COLPOCDIFF %d %d [%d %d / %d %d]\n",colDist, currDist,
1286
44.3k
             colImg->PicOrderCntVal, colShdr->RefPicList_POC[listCol][refIdxCol],
1287
44.3k
             img->PicOrderCntVal, shdr->RefPicList_POC[X][refIdxLX]
1288
44.3k
             );
1289
1290
44.3k
    if (isLongTerm || colDist == currDist) {
1291
37.8k
      *out_mvLXCol = mvCol;
1292
37.8k
    }
1293
6.48k
    else {
1294
6.48k
      if (!scale_mv(out_mvLXCol, mvCol, colDist, currDist)) {
1295
425
        ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1296
425
        img->integrity = INTEGRITY_DECODING_ERRORS;
1297
425
      }
1298
1299
6.48k
      logtrace(LogMotion,"scale: %d;%d to %d;%d\n",
1300
6.48k
               mvCol.x,mvCol.y, out_mvLXCol->x,out_mvLXCol->y);
1301
6.48k
    }
1302
44.3k
  }
1303
53.2k
}
1304
1305
1306
// 8.5.3.1.7
1307
void derive_temporal_luma_vector_prediction(base_context* ctx,
1308
                                            de265_image* img,
1309
                                            const slice_segment_header* shdr,
1310
                                            int xP,int yP,
1311
                                            int nPbW,int nPbH,
1312
                                            int refIdxL,
1313
                                            int X, // which MV (L0/L1) to get
1314
                                            MotionVector* out_mvLXCol,
1315
                                            uint8_t*      out_availableFlagLXCol)
1316
1.17M
{
1317
  // --- no temporal MVP -> exit ---
1318
1319
1.17M
  if (shdr->slice_temporal_mvp_enabled_flag == 0) {
1320
537k
    out_mvLXCol->x = 0;
1321
537k
    out_mvLXCol->y = 0;
1322
537k
    *out_availableFlagLXCol = 0;
1323
537k
    return;
1324
537k
  }
1325
1326
1327
  // --- find collocated reference image ---
1328
1329
632k
  int Log2CtbSizeY = img->get_sps().Log2CtbSizeY;
1330
1331
632k
  int colPic; // TODO: this is the same for the whole slice. We can precompute it.
1332
1333
632k
  if (shdr->slice_type == SLICE_TYPE_B &&
1334
632k
      shdr->collocated_from_l0_flag == 0)
1335
120k
    {
1336
120k
      logtrace(LogMotion,"collocated L1 ref_idx=%d\n",shdr->collocated_ref_idx);
1337
1338
120k
      colPic = shdr->RefPicList[1][ shdr->collocated_ref_idx ];
1339
120k
    }
1340
512k
  else
1341
512k
    {
1342
512k
      logtrace(LogMotion,"collocated L0 ref_idx=%d\n",shdr->collocated_ref_idx);
1343
1344
512k
      colPic = shdr->RefPicList[0][ shdr->collocated_ref_idx ];
1345
512k
    }
1346
1347
1348
  // check whether collocated reference picture exists
1349
1350
632k
  if (!ctx->has_image(colPic)) {
1351
0
    out_mvLXCol->x = 0;
1352
0
    out_mvLXCol->y = 0;
1353
0
    *out_availableFlagLXCol = 0;
1354
1355
0
    ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
1356
0
    return;
1357
0
  }
1358
1359
1360
  // --- get collocated MV either at bottom-right corner or from center of PB ---
1361
1362
632k
  int xColPb,yColPb;
1363
632k
  int yColBr = yP + nPbH; // bottom right collocated motion vector position
1364
632k
  int xColBr = xP + nPbW;
1365
1366
  /* If neighboring pixel at bottom-right corner is in the same CTB-row and inside the image,
1367
     use this (reduced down to 16 pixels resolution) as collocated MV position.
1368
1369
     Note: see 2014, Sze, Sect. 5.2.1.2 why candidate C0 is excluded when on another CTB-row.
1370
     This is to reduce the memory bandwidth requirements.
1371
   */
1372
632k
  if ((yP>>Log2CtbSizeY) == (yColBr>>Log2CtbSizeY) &&
1373
632k
      xColBr < img->get_sps().pic_width_in_luma_samples &&
1374
632k
      yColBr < img->get_sps().pic_height_in_luma_samples)
1375
260k
    {
1376
260k
      xColPb = xColBr & ~0x0F; // reduce resolution of collocated motion-vectors to 16 pixels grid
1377
260k
      yColPb = yColBr & ~0x0F;
1378
1379
260k
      derive_collocated_motion_vectors(ctx,img,shdr, xP,yP, colPic, xColPb,yColPb, refIdxL, X,
1380
260k
                                       out_mvLXCol, out_availableFlagLXCol);
1381
260k
    }
1382
371k
  else
1383
371k
    {
1384
371k
      out_mvLXCol->x = 0;
1385
371k
      out_mvLXCol->y = 0;
1386
371k
      *out_availableFlagLXCol = 0;
1387
371k
    }
1388
1389
1390
632k
  if (*out_availableFlagLXCol==0) {
1391
1392
626k
    int xColCtr = xP+(nPbW>>1);
1393
626k
    int yColCtr = yP+(nPbH>>1);
1394
1395
626k
    xColPb = xColCtr & ~0x0F; // reduce resolution of collocated motion-vectors to 16 pixels grid
1396
626k
    yColPb = yColCtr & ~0x0F;
1397
1398
626k
    derive_collocated_motion_vectors(ctx,img,shdr, xP,yP, colPic, xColPb,yColPb, refIdxL, X,
1399
626k
                                     out_mvLXCol, out_availableFlagLXCol);
1400
626k
  }
1401
632k
}
1402
1403
1404
static int table_8_19[2][12] = {
1405
  { 0,1,0,2,1,2,0,3,1,3,2,3 },
1406
  { 1,0,2,0,2,1,3,0,3,1,3,2 }
1407
  };
1408
1409
// 8.5.3.1.3
1410
/* Note (TODO): during decoding, we know which of the candidates we will select.
1411
+   Hence, we do not really have to generate the other ones...
1412
+ */
1413
void derive_combined_bipredictive_merging_candidates(const base_context* ctx,
1414
                                                     const slice_segment_header* shdr,
1415
                                                     PBMotion* inout_mergeCandList,
1416
                                                     int* inout_numMergeCand,
1417
                                                     int maxCandidates)
1418
1.72M
{
1419
1.72M
  if (*inout_numMergeCand>1 && *inout_numMergeCand < maxCandidates) {
1420
19.3k
    int numOrigMergeCand = *inout_numMergeCand;
1421
1422
19.3k
    int numInputMergeCand = *inout_numMergeCand;
1423
19.3k
    int combIdx = 0;
1424
19.3k
    uint8_t combStop = false;
1425
1426
63.5k
    while (!combStop) {
1427
44.2k
      int l0CandIdx = table_8_19[0][combIdx];
1428
44.2k
      int l1CandIdx = table_8_19[1][combIdx];
1429
1430
44.2k
      if (l0CandIdx >= numInputMergeCand ||
1431
44.2k
          l1CandIdx >= numInputMergeCand) {
1432
0
        assert(false); // bitstream error -> TODO: conceal error
1433
0
      }
1434
1435
44.2k
      PBMotion& l0Cand = inout_mergeCandList[l0CandIdx];
1436
44.2k
      PBMotion& l1Cand = inout_mergeCandList[l1CandIdx];
1437
1438
44.2k
      logtrace(LogMotion,"add bipredictive merging candidate (combIdx:%d)\n",combIdx);
1439
44.2k
      logtrace(LogMotion,"l0Cand:\n"); logmvcand(l0Cand);
1440
44.2k
      logtrace(LogMotion,"l1Cand:\n"); logmvcand(l1Cand);
1441
1442
44.2k
      const de265_image* img0 = l0Cand.predFlag[0] ? ctx->get_image(shdr->RefPicList[0][l0Cand.refIdx[0]]) : NULL;
1443
44.2k
      const de265_image* img1 = l1Cand.predFlag[1] ? ctx->get_image(shdr->RefPicList[1][l1Cand.refIdx[1]]) : NULL;
1444
1445
44.2k
      if (l0Cand.predFlag[0] && !img0) {
1446
0
        return; // TODO error
1447
0
      }
1448
1449
44.2k
      if (l1Cand.predFlag[1] && !img1) {
1450
0
        return; // TODO error
1451
0
      }
1452
1453
44.2k
      if (l0Cand.predFlag[0] && l1Cand.predFlag[1] &&
1454
44.2k
          (img0->PicOrderCntVal != img1->PicOrderCntVal     ||
1455
23.5k
           l0Cand.mv[0].x != l1Cand.mv[1].x ||
1456
23.5k
           l0Cand.mv[0].y != l1Cand.mv[1].y)) {
1457
12.2k
        PBMotion& p = inout_mergeCandList[ *inout_numMergeCand ];
1458
12.2k
        p.refIdx[0] = l0Cand.refIdx[0];
1459
12.2k
        p.refIdx[1] = l1Cand.refIdx[1];
1460
12.2k
        p.predFlag[0] = l0Cand.predFlag[0];
1461
12.2k
        p.predFlag[1] = l1Cand.predFlag[1];
1462
12.2k
        p.mv[0] = l0Cand.mv[0];
1463
12.2k
        p.mv[1] = l1Cand.mv[1];
1464
12.2k
        (*inout_numMergeCand)++;
1465
1466
12.2k
        logtrace(LogMotion,"result:\n");
1467
12.2k
        logmvcand(p);
1468
12.2k
      }
1469
1470
44.2k
      combIdx++;
1471
44.2k
      if (combIdx == numOrigMergeCand*(numOrigMergeCand-1) ||
1472
44.2k
          *inout_numMergeCand == maxCandidates) {
1473
19.3k
        combStop = true;
1474
19.3k
      }
1475
44.2k
    }
1476
19.3k
  }
1477
1.72M
}
1478
1479
1480
// 8.5.3.1.1
1481
1482
void get_merge_candidate_list_without_step_9(base_context* ctx,
1483
                                             const slice_segment_header* shdr,
1484
                                             const MotionVectorAccess& mvaccess,
1485
                                             de265_image* img,
1486
                                             int xC,int yC, int xP,int yP,
1487
                                             int nCS, int nPbW,int nPbH, int partIdx,
1488
                                             int max_merge_idx,
1489
                                             PBMotion* mergeCandList)
1490
1.99M
{
1491
1492
  //int xOrigP = xP;
1493
  //int yOrigP = yP;
1494
1.99M
  int nOrigPbW = nPbW;
1495
1.99M
  int nOrigPbH = nPbH;
1496
1497
1.99M
  int singleMCLFlag; // single merge-candidate-list (MCL) flag
1498
1499
  /* Use single MCL for CBs of size 8x8, except when parallel-merge-level is at 4x4.
1500
     Without this flag, PBs smaller than 8x8 would not receive as much merging candidates.
1501
     Having additional candidates might have these advantages:
1502
     - coding MVs for these small PBs is expensive, and
1503
     - since the PBs are not far away from a proper (neighboring) merging candidate,
1504
     the quality of the candidates will still be good.
1505
  */
1506
1.99M
  singleMCLFlag = (img->get_pps().log2_parallel_merge_level > 2 && nCS==8);
1507
1508
1.99M
  if (singleMCLFlag) {
1509
773k
    xP=xC;
1510
773k
    yP=yC;
1511
773k
    nPbW=nCS;
1512
773k
    nPbH=nCS;
1513
773k
    partIdx=0;
1514
773k
  }
1515
1516
1.99M
  int maxCandidates = max_merge_idx+1;
1517
  //MotionVectorSpec mergeCandList[5];
1518
1.99M
  int numMergeCand=0;
1519
1520
  // --- spatial merge candidates
1521
1522
1.99M
  numMergeCand = derive_spatial_merging_candidates(mvaccess,
1523
1.99M
                                                   img, xC,yC, nCS, xP,yP, singleMCLFlag,
1524
1.99M
                                                   nPbW,nPbH,partIdx, mergeCandList,
1525
1.99M
                                                   maxCandidates);
1526
1527
  // --- collocated merge candidate
1528
1.99M
  if (numMergeCand < maxCandidates) {
1529
263k
    int refIdxCol[2] = { 0,0 };
1530
1531
263k
    MotionVector mvCol[2];
1532
263k
    uint8_t predFlagLCol[2];
1533
263k
    derive_temporal_luma_vector_prediction(ctx,img,shdr, xP,yP,nPbW,nPbH,
1534
263k
                                           refIdxCol[0],0, &mvCol[0],
1535
263k
                                           &predFlagLCol[0]);
1536
1537
263k
    uint8_t availableFlagCol = predFlagLCol[0];
1538
263k
    predFlagLCol[1] = 0;
1539
1540
263k
    if (shdr->slice_type == SLICE_TYPE_B) {
1541
233k
      derive_temporal_luma_vector_prediction(ctx,img,shdr,
1542
233k
                                             xP,yP,nPbW,nPbH, refIdxCol[1],1, &mvCol[1],
1543
233k
                                             &predFlagLCol[1]);
1544
233k
      availableFlagCol |= predFlagLCol[1];
1545
233k
    }
1546
1547
1548
263k
    if (availableFlagCol) {
1549
7.56k
      PBMotion* colVec = &mergeCandList[numMergeCand++];
1550
1551
7.56k
      colVec->mv[0] = mvCol[0];
1552
7.56k
      colVec->mv[1] = mvCol[1];
1553
7.56k
      colVec->predFlag[0] = predFlagLCol[0];
1554
7.56k
      colVec->predFlag[1] = predFlagLCol[1];
1555
7.56k
      colVec->refIdx[0] = refIdxCol[0];
1556
7.56k
      colVec->refIdx[1] = refIdxCol[1];
1557
7.56k
    }
1558
263k
  }
1559
1560
1561
  // --- bipredictive merge candidates ---
1562
1563
1.99M
  if (shdr->slice_type == SLICE_TYPE_B) {
1564
1.72M
    derive_combined_bipredictive_merging_candidates(ctx, shdr,
1565
1.72M
                                                    mergeCandList, &numMergeCand, maxCandidates);
1566
1.72M
  }
1567
1568
1569
  // --- zero-vector merge candidates ---
1570
1571
1.99M
  derive_zero_motion_vector_candidates(shdr, mergeCandList, &numMergeCand, maxCandidates);
1572
1573
1574
1.99M
  logtrace(LogMotion,"mergeCandList:\n");
1575
9.37M
  for (int i=0;i<shdr->MaxNumMergeCand;i++)
1576
7.38M
    {
1577
      //logtrace(LogMotion, " %d:%s\n", i, i==merge_idx ? " SELECTED":"");
1578
7.38M
      logmvcand(mergeCandList[i]);
1579
7.38M
    }
1580
1.99M
}
1581
1582
1583
1584
void get_merge_candidate_list(base_context* ctx,
1585
                              const slice_segment_header* shdr,
1586
                              de265_image* img,
1587
                              int xC,int yC, int xP,int yP,
1588
                              int nCS, int nPbW,int nPbH, int partIdx,
1589
                              PBMotion* mergeCandList)
1590
0
{
1591
0
  int max_merge_idx = 5-shdr->five_minus_max_num_merge_cand -1;
1592
1593
0
  get_merge_candidate_list_without_step_9(ctx, shdr,
1594
0
                                          MotionVectorAccess_de265_image(img), img,
1595
0
                                          xC,yC,xP,yP,nCS,nPbW,nPbH, partIdx,
1596
0
                                          max_merge_idx, mergeCandList);
1597
1598
  // 9. for encoder: modify all merge candidates
1599
1600
0
  for (int i=0;i<=max_merge_idx;i++) {
1601
0
    if (mergeCandList[i].predFlag[0] &&
1602
0
        mergeCandList[i].predFlag[1] &&
1603
0
        nPbW+nPbH==12)
1604
0
      {
1605
0
        mergeCandList[i].refIdx[1]   = -1;
1606
0
        mergeCandList[i].predFlag[1] = 0;
1607
0
      }
1608
0
  }
1609
0
}
1610
1611
1612
void derive_luma_motion_merge_mode(base_context* ctx,
1613
                                   const slice_segment_header* shdr,
1614
                                   de265_image* img,
1615
                                   int xC,int yC, int xP,int yP,
1616
                                   int nCS, int nPbW,int nPbH, int partIdx,
1617
                                   int merge_idx,
1618
                                   PBMotion* out_vi)
1619
1.99M
{
1620
1.99M
  PBMotion mergeCandList[5];
1621
1622
1.99M
  get_merge_candidate_list_without_step_9(ctx, shdr,
1623
1.99M
                                          MotionVectorAccess_de265_image(img), img,
1624
1.99M
                                          xC,yC,xP,yP,nCS,nPbW,nPbH, partIdx,
1625
1.99M
                                          merge_idx, mergeCandList);
1626
1627
1628
1.99M
  *out_vi = mergeCandList[merge_idx];
1629
1630
  // 8.5.3.1.1 / 9.
1631
1632
1.99M
  if (out_vi->predFlag[0] && out_vi->predFlag[1] && nPbW+nPbH==12) {
1633
146k
    out_vi->refIdx[1] = -1;
1634
146k
    out_vi->predFlag[1] = 0;
1635
146k
  }
1636
1.99M
}
1637
1638
1639
// 8.5.3.1.6
1640
void derive_spatial_luma_vector_prediction(base_context* ctx,
1641
                                           de265_image* img,
1642
                                           const slice_segment_header* shdr,
1643
                                           int xC,int yC,int nCS,int xP,int yP,
1644
                                           int nPbW,int nPbH, int X,
1645
                                           int refIdxLX, int partIdx,
1646
                                           uint8_t out_availableFlagLXN[2],
1647
                                           MotionVector out_mvLXN[2])
1648
978k
{
1649
978k
  if (refIdxLX >= MAX_NUM_REF_PICS) {
1650
0
    ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1651
0
    img->integrity = INTEGRITY_DECODING_ERRORS;
1652
1653
0
    out_availableFlagLXN[0] = false;
1654
0
    out_availableFlagLXN[1] = false;
1655
0
    out_mvLXN[0] = MotionVector();
1656
0
    out_mvLXN[1] = MotionVector();
1657
0
    return;
1658
0
  }
1659
1660
978k
  int isScaledFlagLX = 0;
1661
1662
978k
  const int A=0;
1663
978k
  const int B=1;
1664
1665
978k
  out_availableFlagLXN[A] = 0;
1666
978k
  out_availableFlagLXN[B] = 0;
1667
1668
1669
  // --- A ---
1670
1671
  // 1.
1672
1673
978k
  int xA[2], yA[2];
1674
978k
  xA[0] = xP-1;
1675
978k
  yA[0] = yP + nPbH;
1676
978k
  xA[1] = xA[0];
1677
978k
  yA[1] = yA[0]-1;
1678
1679
  // 2.
1680
1681
978k
  out_availableFlagLXN[A] = 0;
1682
978k
  out_mvLXN[A].x = 0;
1683
978k
  out_mvLXN[A].y = 0;
1684
1685
  // 3. / 4.
1686
1687
978k
  bool availableA[2];
1688
978k
  availableA[0] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA[0],yA[0]);
1689
978k
  availableA[1] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA[1],yA[1]);
1690
1691
  // 5.
1692
1693
978k
  if (availableA[0] || availableA[1]) {
1694
784k
    isScaledFlagLX = 1;
1695
784k
  }
1696
1697
  // 6.  test A0 and A1  (Ak)
1698
1699
978k
  int refIdxA=-1;
1700
1701
  // the POC we want to reference in this PB
1702
978k
  const de265_image* tmpimg = ctx->get_image(shdr->RefPicList[X][ refIdxLX ]);
1703
978k
  if (tmpimg==NULL) { return; }
1704
978k
  const int referenced_POC = tmpimg->PicOrderCntVal;
1705
1706
2.93M
  for (int k=0;k<=1;k++) {
1707
1708
1.95M
    if (availableA[k] &&
1709
1.95M
        out_availableFlagLXN[A]==0 && // no A?-predictor so far
1710
1.95M
        img->get_pred_mode(xA[k],yA[k]) != MODE_INTRA) {
1711
1712
791k
      int Y=1-X;
1713
1714
791k
      const PBMotion& vi = img->get_mv_info(xA[k],yA[k]);
1715
791k
      logtrace(LogMotion,"MVP A%d=\n",k);
1716
791k
      logmvcand(vi);
1717
1718
791k
      const de265_image* imgX = NULL;
1719
791k
      if (vi.predFlag[X]) {
1720
        // check for input data validity
1721
763k
        if (vi.refIdx[X]<0 || vi.refIdx[X] >= MAX_NUM_REF_PICS) {
1722
0
          return;
1723
0
        }
1724
1725
763k
        imgX = ctx->get_image(shdr->RefPicList[X][ vi.refIdx[X] ]);
1726
763k
      }
1727
1728
791k
      const de265_image* imgY = NULL;
1729
791k
      if (vi.predFlag[Y]) {
1730
        // check for input data validity
1731
391k
        if (vi.refIdx[Y]<0 || vi.refIdx[Y] >= MAX_NUM_REF_PICS) {
1732
0
          return;
1733
0
        }
1734
1735
391k
        imgY = ctx->get_image(shdr->RefPicList[Y][ vi.refIdx[Y] ]);
1736
391k
      }
1737
1738
      // check whether the predictor X is available and references the same POC
1739
791k
      if (vi.predFlag[X] && imgX && imgX->PicOrderCntVal == referenced_POC) {
1740
1741
747k
        logtrace(LogMotion,"take A%d/L%d as A candidate with same POC\n",k,X);
1742
1743
747k
        out_availableFlagLXN[A]=1;
1744
747k
        out_mvLXN[A] = vi.mv[X];
1745
747k
        refIdxA = vi.refIdx[X];
1746
747k
      }
1747
      // check whether the other predictor (Y) is available and references the same POC
1748
43.7k
      else if (vi.predFlag[Y] && imgY && imgY->PicOrderCntVal == referenced_POC) {
1749
1750
19.5k
        logtrace(LogMotion,"take A%d/L%d as A candidate with same POC\n",k,Y);
1751
1752
19.5k
        out_availableFlagLXN[A]=1;
1753
19.5k
        out_mvLXN[A] = vi.mv[Y];
1754
19.5k
        refIdxA = vi.refIdx[Y];
1755
19.5k
      }
1756
791k
    }
1757
1.95M
  }
1758
1759
  // 7. If there is no predictor referencing the same POC, we take any other reference as
1760
  //    long as it is the same type of reference (long-term / short-term)
1761
1762
1.39M
  for (int k=0 ; k<=1 && out_availableFlagLXN[A]==0 ; k++) {
1763
419k
    int refPicList=-1;
1764
1765
419k
    if (availableA[k] &&
1766
        // TODO: we could remove this call by storing the result of the similar computation above
1767
419k
        img->get_pred_mode(xA[k],yA[k]) != MODE_INTRA) {
1768
1769
18.5k
      int Y=1-X;
1770
1771
18.5k
      const PBMotion& vi = img->get_mv_info(xA[k],yA[k]);
1772
18.5k
      if (vi.predFlag[X]==1 &&
1773
18.5k
          shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[X][ vi.refIdx[X] ]) {
1774
1775
6.33k
        logtrace(LogMotion,"take A%D/L%d as A candidate with different POCs\n",k,X);
1776
1777
6.33k
        out_availableFlagLXN[A]=1;
1778
6.33k
        out_mvLXN[A] = vi.mv[X];
1779
6.33k
        refIdxA = vi.refIdx[X];
1780
6.33k
        refPicList = X;
1781
6.33k
      }
1782
12.1k
      else if (vi.predFlag[Y]==1 &&
1783
12.1k
               shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[Y][ vi.refIdx[Y] ]) {
1784
1785
6.94k
        logtrace(LogMotion,"take A%d/L%d as A candidate with different POCs\n",k,Y);
1786
1787
6.94k
        out_availableFlagLXN[A]=1;
1788
6.94k
        out_mvLXN[A] = vi.mv[Y];
1789
6.94k
        refIdxA = vi.refIdx[Y];
1790
6.94k
        refPicList = Y;
1791
6.94k
      }
1792
18.5k
    }
1793
1794
419k
    if (out_availableFlagLXN[A]==1) {
1795
13.2k
      if (refIdxA<0) {
1796
0
        out_availableFlagLXN[0] = out_availableFlagLXN[1] = false;
1797
0
        return; // error
1798
0
      }
1799
1800
13.2k
      assert(refIdxA>=0);
1801
13.2k
      assert(refPicList>=0);
1802
1803
13.2k
      const de265_image* refPicA = ctx->get_image(shdr->RefPicList[refPicList][refIdxA ]);
1804
13.2k
      const de265_image* refPicX = ctx->get_image(shdr->RefPicList[X         ][refIdxLX]);
1805
1806
      //int picStateA = shdr->RefPicList_PicState[refPicList][refIdxA ];
1807
      //int picStateX = shdr->RefPicList_PicState[X         ][refIdxLX];
1808
1809
13.2k
      int isLongTermA = shdr->LongTermRefPic[refPicList][refIdxA ];
1810
13.2k
      int isLongTermX = shdr->LongTermRefPic[X         ][refIdxLX];
1811
1812
13.2k
      logtrace(LogMotion,"scale MVP A: A-POC:%d X-POC:%d\n",
1813
13.2k
               refPicA->PicOrderCntVal,refPicX->PicOrderCntVal);
1814
1815
13.2k
      if (!isLongTermA && !isLongTermX)
1816
      /*
1817
      if (picStateA == UsedForShortTermReference &&
1818
          picStateX == UsedForShortTermReference)
1819
      */
1820
11.6k
        {
1821
11.6k
          int distA = img->PicOrderCntVal - refPicA->PicOrderCntVal;
1822
11.6k
          int distX = img->PicOrderCntVal - referenced_POC;
1823
1824
11.6k
          if (!scale_mv(&out_mvLXN[A], out_mvLXN[A], distA, distX)) {
1825
1.90k
            ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1826
1.90k
            img->integrity = INTEGRITY_DECODING_ERRORS;
1827
1.90k
          }
1828
11.6k
        }
1829
13.2k
    }
1830
419k
  }
1831
1832
  // --- B ---
1833
1834
  // 1.
1835
1836
978k
  int xB[3], yB[3];
1837
978k
  xB[0] = xP+nPbW;
1838
978k
  yB[0] = yP-1;
1839
978k
  xB[1] = xB[0]-1;
1840
978k
  yB[1] = yP-1;
1841
978k
  xB[2] = xP-1;
1842
978k
  yB[2] = yP-1;
1843
1844
  // 2.
1845
1846
978k
  out_availableFlagLXN[B] = 0;
1847
978k
  out_mvLXN[B].x = 0;
1848
978k
  out_mvLXN[B].y = 0;
1849
1850
  // 3. test B0,B1,B2 (Bk)
1851
1852
978k
  int refIdxB=-1;
1853
1854
978k
  bool availableB[3];
1855
3.91M
  for (int k=0;k<3;k++) {
1856
2.93M
    availableB[k] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB[k],yB[k]);
1857
1858
2.93M
    if (availableB[k] && out_availableFlagLXN[B]==0) {
1859
1860
866k
      int Y=1-X;
1861
1862
866k
      const PBMotion& vi = img->get_mv_info(xB[k],yB[k]);
1863
866k
      logtrace(LogMotion,"MVP B%d=\n",k);
1864
866k
      logmvcand(vi);
1865
1866
866k
      const de265_image* imgX = NULL;
1867
866k
      if (vi.predFlag[X]) {
1868
826k
        if (vi.refIdx[X] < 0 || vi.refIdx[X] >= MAX_NUM_REF_PICS) {
1869
0
          return;
1870
0
        }
1871
1872
826k
        imgX = ctx->get_image(shdr->RefPicList[X][ vi.refIdx[X] ]);
1873
826k
      }
1874
1875
866k
      const de265_image* imgY = NULL;
1876
866k
      if (vi.predFlag[Y]) {
1877
388k
        if (vi.refIdx[Y] < 0 || vi.refIdx[Y] >= MAX_NUM_REF_PICS) {
1878
0
          return;
1879
0
        }
1880
1881
388k
        imgY = ctx->get_image(shdr->RefPicList[Y][ vi.refIdx[Y] ]);
1882
388k
      }
1883
1884
866k
      if (vi.predFlag[X] && imgX && imgX->PicOrderCntVal == referenced_POC) {
1885
795k
        logtrace(LogMotion,"a) take B%d/L%d as B candidate with same POC\n",k,X);
1886
1887
795k
        out_availableFlagLXN[B]=1;
1888
795k
        out_mvLXN[B] = vi.mv[X];
1889
795k
        refIdxB = vi.refIdx[X];
1890
795k
      }
1891
70.8k
      else if (vi.predFlag[Y] && imgY && imgY->PicOrderCntVal == referenced_POC) {
1892
25.6k
        logtrace(LogMotion,"b) take B%d/L%d as B candidate with same POC\n",k,Y);
1893
1894
25.6k
        out_availableFlagLXN[B]=1;
1895
25.6k
        out_mvLXN[B] = vi.mv[Y];
1896
25.6k
        refIdxB = vi.refIdx[Y];
1897
25.6k
      }
1898
866k
    }
1899
2.93M
  }
1900
1901
  // 4.
1902
1903
978k
  if (isScaledFlagLX==0 &&      // no A predictor,
1904
978k
      out_availableFlagLXN[B])  // but an unscaled B predictor
1905
188k
    {
1906
      // use unscaled B predictor as A predictor
1907
1908
188k
      logtrace(LogMotion,"copy the same-POC B candidate as additional A candidate\n");
1909
1910
188k
      out_availableFlagLXN[A]=1;
1911
188k
      out_mvLXN[A] = out_mvLXN[B];
1912
188k
      refIdxA = refIdxB;
1913
188k
    }
1914
1915
  // 5.
1916
1917
  // If no A predictor, we output the unscaled B as the A predictor (above)
1918
  // and also add a scaled B predictor here.
1919
  // If there is (probably) an A predictor, no differing-POC B predictor is generated.
1920
978k
  if (isScaledFlagLX==0) {
1921
194k
    out_availableFlagLXN[B]=0;
1922
1923
527k
    for (int k=0 ; k<=2 && out_availableFlagLXN[B]==0 ; k++) {
1924
333k
      int refPicList=-1;
1925
1926
333k
      if (availableB[k]) {
1927
191k
        int Y=1-X;
1928
1929
191k
        const PBMotion& vi = img->get_mv_info(xB[k],yB[k]);
1930
1931
191k
        if (vi.refIdx[X] >= MAX_NUM_REF_PICS) {
1932
0
          img->integrity = INTEGRITY_DECODING_ERRORS;
1933
0
          ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
1934
0
          return; // error   // TODO: we actually should make sure that this is never set to an out-of-range value
1935
0
        }
1936
1937
191k
        if (vi.predFlag[X]==1 &&
1938
191k
            shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[X][ vi.refIdx[X] ]) {
1939
181k
          out_availableFlagLXN[B]=1;
1940
181k
          out_mvLXN[B] = vi.mv[X];
1941
181k
          refIdxB = vi.refIdx[X];
1942
181k
          refPicList = X;
1943
181k
        }
1944
9.66k
        else if (vi.predFlag[Y]==1 &&
1945
9.66k
                 shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[Y][ vi.refIdx[Y] ]) {
1946
8.47k
          out_availableFlagLXN[B]=1;
1947
8.47k
          out_mvLXN[B] = vi.mv[Y];
1948
8.47k
          refIdxB = vi.refIdx[Y];
1949
8.47k
          refPicList = Y;
1950
8.47k
        }
1951
191k
      }
1952
1953
333k
      if (out_availableFlagLXN[B]==1) {
1954
190k
        if (refIdxB<0) {
1955
0
          out_availableFlagLXN[0] = out_availableFlagLXN[1] = false;
1956
0
          return; // error
1957
0
        }
1958
1959
190k
        assert(refPicList>=0);
1960
190k
        assert(refIdxB>=0);
1961
1962
190k
        const de265_image* refPicB=ctx->get_image(shdr->RefPicList[refPicList][refIdxB ]);
1963
190k
        const de265_image* refPicX=ctx->get_image(shdr->RefPicList[X         ][refIdxLX]);
1964
1965
190k
        int isLongTermB = shdr->LongTermRefPic[refPicList][refIdxB ];
1966
190k
        int isLongTermX = shdr->LongTermRefPic[X         ][refIdxLX];
1967
1968
190k
        if (refPicB==NULL || refPicX==NULL) {
1969
0
          img->decctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED,false);
1970
0
          img->integrity = INTEGRITY_DECODING_ERRORS;
1971
0
        }
1972
190k
        else if (refPicB->PicOrderCntVal != refPicX->PicOrderCntVal &&
1973
190k
                 !isLongTermB && !isLongTermX) {
1974
1.90k
          int distB = img->PicOrderCntVal - refPicB->PicOrderCntVal;
1975
1.90k
          int distX = img->PicOrderCntVal - referenced_POC;
1976
1977
1.90k
          logtrace(LogMotion,"scale MVP B: B-POC:%d X-POC:%d\n",refPicB->PicOrderCntVal,refPicX->PicOrderCntVal);
1978
1979
1.90k
          if (!scale_mv(&out_mvLXN[B], out_mvLXN[B], distB, distX)) {
1980
315
            ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1981
315
            img->integrity = INTEGRITY_DECODING_ERRORS;
1982
315
          }
1983
1.90k
        }
1984
190k
      }
1985
333k
    }
1986
194k
  }
1987
978k
}
1988
1989
1990
// 8.5.3.1.5
1991
void fill_luma_motion_vector_predictors(base_context* ctx,
1992
                                        const slice_segment_header* shdr,
1993
                                        de265_image* img,
1994
                                        int xC,int yC,int nCS,int xP,int yP,
1995
                                        int nPbW,int nPbH, int l,
1996
                                        int refIdx, int partIdx,
1997
                                        MotionVector out_mvpList[2])
1998
978k
{
1999
  // 8.5.3.1.6: derive two spatial vector predictors A (0) and B (1)
2000
2001
978k
  uint8_t availableFlagLXN[2];
2002
978k
  MotionVector mvLXN[2];
2003
2004
978k
  derive_spatial_luma_vector_prediction(ctx, img, shdr, xC,yC, nCS, xP,yP,
2005
978k
                                        nPbW,nPbH, l, refIdx, partIdx,
2006
978k
                                        availableFlagLXN, mvLXN);
2007
2008
  // 8.5.3.1.7: if we only have one spatial vector or both spatial vectors are the same,
2009
  // derive a temporal predictor
2010
2011
978k
  uint8_t availableFlagLXCol;
2012
978k
  MotionVector mvLXCol;
2013
2014
2015
978k
  if (availableFlagLXN[0] &&
2016
978k
      availableFlagLXN[1] &&
2017
978k
      (mvLXN[0].x != mvLXN[1].x || mvLXN[0].y != mvLXN[1].y)) {
2018
305k
    availableFlagLXCol = 0;
2019
305k
  }
2020
672k
  else {
2021
672k
    derive_temporal_luma_vector_prediction(ctx, img, shdr,
2022
672k
                                           xP,yP, nPbW,nPbH, refIdx,l,
2023
672k
                                           &mvLXCol, &availableFlagLXCol);
2024
672k
  }
2025
2026
2027
  // --- build candidate vector list with exactly two entries ---
2028
2029
978k
  int numMVPCandLX=0;
2030
2031
  // spatial predictor A
2032
2033
978k
  if (availableFlagLXN[0])
2034
968k
    {
2035
968k
      out_mvpList[numMVPCandLX++] = mvLXN[0];
2036
968k
    }
2037
2038
  // spatial predictor B (if not same as A)
2039
2040
978k
  if (availableFlagLXN[1] &&
2041
978k
      (!availableFlagLXN[0] || // in case A in not available, but mvLXA initialized to same as mvLXB
2042
823k
       (mvLXN[0].x != mvLXN[1].x || mvLXN[0].y != mvLXN[1].y)))
2043
309k
    {
2044
309k
      out_mvpList[numMVPCandLX++] = mvLXN[1];
2045
309k
    }
2046
2047
  // temporal predictor
2048
2049
978k
  if (availableFlagLXCol)
2050
29.7k
    {
2051
29.7k
      out_mvpList[numMVPCandLX++] = mvLXCol;
2052
29.7k
    }
2053
2054
  // fill with zero predictors
2055
2056
1.62M
  while (numMVPCandLX<2) {
2057
649k
    out_mvpList[numMVPCandLX].x = 0;
2058
649k
    out_mvpList[numMVPCandLX].y = 0;
2059
649k
    numMVPCandLX++;
2060
649k
  }
2061
2062
2063
978k
  assert(numMVPCandLX==2);
2064
978k
}
2065
2066
2067
MotionVector luma_motion_vector_prediction(base_context* ctx,
2068
                                           const slice_segment_header* shdr,
2069
                                           de265_image* img,
2070
                                           const PBMotionCoding& motion,
2071
                                           int xC,int yC,int nCS,int xP,int yP,
2072
                                           int nPbW,int nPbH, int l,
2073
                                           int refIdx, int partIdx)
2074
978k
{
2075
978k
  MotionVector mvpList[2];
2076
2077
978k
  fill_luma_motion_vector_predictors(ctx, shdr, img,
2078
978k
                                     xC,yC,nCS,xP,yP,
2079
978k
                                     nPbW, nPbH, l, refIdx, partIdx,
2080
978k
                                     mvpList);
2081
2082
  // select predictor according to mvp_lX_flag
2083
2084
978k
  return mvpList[ l ? motion.mvp_l1_flag : motion.mvp_l0_flag ];
2085
978k
}
2086
2087
2088
#if DE265_LOG_TRACE
2089
void logMV(int x0,int y0,int nPbW,int nPbH, const char* mode,const PBMotion* mv)
2090
{
2091
  int pred0 = mv->predFlag[0];
2092
  int pred1 = mv->predFlag[1];
2093
2094
  logtrace(LogMotion,
2095
           "*MV %d;%d [%d;%d] %s: (%d) %d;%d @%d   (%d) %d;%d @%d\n", x0,y0,nPbW,nPbH,mode,
2096
           pred0,
2097
           pred0 ? mv->mv[0].x : 0,pred0 ? mv->mv[0].y : 0, pred0 ? mv->refIdx[0] : 0,
2098
           pred1,
2099
           pred1 ? mv->mv[1].x : 0,pred1 ? mv->mv[1].y : 0, pred1 ? mv->refIdx[1] : 0);
2100
}
2101
#else
2102
#define logMV(x0,y0,nPbW,nPbH,mode,mv)
2103
#endif
2104
2105
2106
2107
// 8.5.3.1
2108
void motion_vectors_and_ref_indices(base_context* ctx,
2109
                                    const slice_segment_header* shdr,
2110
                                    de265_image* img,
2111
                                    const PBMotionCoding& motion,
2112
                                    int xC,int yC, int xB,int yB, int nCS, int nPbW,int nPbH,
2113
                                    int partIdx,
2114
                                    PBMotion* out_vi)
2115
2.76M
{
2116
  //slice_segment_header* shdr = tctx->shdr;
2117
2118
2.76M
  int xP = xC+xB;
2119
2.76M
  int yP = yC+yB;
2120
2121
2.76M
  enum PredMode predMode = img->get_pred_mode(xC,yC);
2122
2123
2.76M
  if (predMode == MODE_SKIP ||
2124
2.76M
      (predMode == MODE_INTER && motion.merge_flag))
2125
1.99M
    {
2126
1.99M
      derive_luma_motion_merge_mode(ctx,shdr,img,
2127
1.99M
                                    xC,yC, xP,yP, nCS,nPbW,nPbH, partIdx,
2128
1.99M
                                    motion.merge_idx, out_vi);
2129
2130
1.99M
      logMV(xP,yP,nPbW,nPbH, "merge_mode", out_vi);
2131
1.99M
    }
2132
765k
  else {
2133
765k
    int mvdL[2][2];
2134
765k
    MotionVector mvpL[2];
2135
2136
2.29M
    for (int l=0;l<2;l++) {
2137
      // 1.
2138
2139
1.53M
      enum InterPredIdc inter_pred_idc = (enum InterPredIdc)motion.inter_pred_idc;
2140
2141
1.53M
      if (inter_pred_idc == PRED_BI ||
2142
1.53M
          (inter_pred_idc == PRED_L0 && l==0) ||
2143
1.53M
          (inter_pred_idc == PRED_L1 && l==1)) {
2144
978k
        out_vi->refIdx[l] = motion.refIdx[l];
2145
978k
        out_vi->predFlag[l] = 1;
2146
2147
978k
        if (motion.refIdx[l] >= MAX_NUM_REF_PICS) {
2148
0
          out_vi->refIdx[l] = 0;
2149
2150
0
          img->integrity = INTEGRITY_DECODING_ERRORS;
2151
0
          ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
2152
0
          return;
2153
0
        }
2154
978k
      }
2155
551k
      else {
2156
551k
        out_vi->refIdx[l] = -1;
2157
551k
        out_vi->predFlag[l] = 0;
2158
551k
      }
2159
2160
      // 2.
2161
2162
1.53M
      mvdL[l][0] = motion.mvd[l][0];
2163
1.53M
      mvdL[l][1] = motion.mvd[l][1];
2164
2165
2166
1.53M
      if (out_vi->predFlag[l]) {
2167
        // 3.
2168
2169
978k
        mvpL[l] = luma_motion_vector_prediction(ctx,shdr,img,motion,
2170
978k
                                                xC,yC,nCS,xP,yP, nPbW,nPbH, l,
2171
978k
                                                out_vi->refIdx[l], partIdx);
2172
2173
        // 4.
2174
2175
978k
        int32_t x = (mvpL[l].x + mvdL[l][0] + 0x10000) & 0xFFFF;
2176
978k
        int32_t y = (mvpL[l].y + mvdL[l][1] + 0x10000) & 0xFFFF;
2177
2178
978k
        out_vi->mv[l].x = (x>=0x8000) ? x-0x10000 : x;
2179
978k
        out_vi->mv[l].y = (y>=0x8000) ? y-0x10000 : y;
2180
978k
      }
2181
1.53M
    }
2182
2183
765k
    logMV(xP,yP,nPbW,nPbH, "mvp", out_vi);
2184
765k
  }
2185
2.76M
}
2186
2187
2188
// 8.5.3
2189
2190
/* xC/yC : CB position
2191
   xB/yB : position offset of the PB
2192
   nPbW/nPbH : size of PB
2193
   nCS   : CB size
2194
 */
2195
void decode_prediction_unit(base_context* ctx,
2196
                            const slice_segment_header* shdr,
2197
                            de265_image* img,
2198
                            const PBMotionCoding& motion,
2199
                            int xC,int yC, int xB,int yB, int nCS, int nPbW,int nPbH, int partIdx)
2200
2.76M
{
2201
2.76M
  logtrace(LogMotion,"decode_prediction_unit POC=%d %d;%d %dx%d\n",
2202
2.76M
           img->PicOrderCntVal, xC+xB,yC+yB, nPbW,nPbH);
2203
2204
  //slice_segment_header* shdr = tctx->shdr;
2205
2206
  // 1.
2207
2208
2.76M
  PBMotion vi;
2209
2.76M
  motion_vectors_and_ref_indices(ctx, shdr, img, motion,
2210
2.76M
                                 xC,yC, xB,yB, nCS, nPbW,nPbH, partIdx, &vi);
2211
2212
  // 2.
2213
2214
2.76M
  generate_inter_prediction_samples(ctx,shdr, img, xC,yC, xB,yB, nCS, nPbW,nPbH, &vi);
2215
2216
2217
2.76M
  img->set_mv_info(xC+xB,yC+yB,nPbW,nPbH, vi);
2218
2.76M
}