/src/libde265/libde265/motion.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "motion.h" |
22 | | #include "decctx.h" |
23 | | #include "util.h" |
24 | | #include "dpb.h" |
25 | | |
26 | | #include <assert.h> |
27 | | |
28 | | |
29 | | #include <sys/types.h> |
30 | | #include <signal.h> |
31 | | #include <string.h> |
32 | | |
33 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
34 | | # include <malloc.h> |
35 | | #elif defined(HAVE_ALLOCA_H) |
36 | | # include <alloca.h> |
37 | | #endif |
38 | | |
39 | | |
40 | 237M | #define MAX_CU_SIZE 64 |
41 | | |
42 | | |
43 | | static int extra_before[4] = { 0,3,3,2 }; |
44 | | static int extra_after [4] = { 0,3,4,4 }; |
45 | | |
46 | | |
47 | | |
48 | | template <class pixel_t> |
49 | | void mc_luma(const base_context* ctx, |
50 | | const seq_parameter_set* sps, int mv_x, int mv_y, |
51 | | int xP,int yP, |
52 | | int16_t* out, int out_stride, |
53 | | const pixel_t* ref, int ref_stride, |
54 | | int nPbW, int nPbH, int bitDepth_L) |
55 | 3.59M | { |
56 | 3.59M | int xFracL = mv_x & 3; |
57 | 3.59M | int yFracL = mv_y & 3; |
58 | | |
59 | 3.59M | int xIntOffsL = xP + (mv_x>>2); |
60 | 3.59M | int yIntOffsL = yP + (mv_y>>2); |
61 | | |
62 | | // luma sample interpolation process (8.5.3.2.2.1) |
63 | | |
64 | | //const int shift1 = sps->BitDepth_Y-8; |
65 | | //const int shift2 = 6; |
66 | 3.59M | const int shift3 = 14 - sps->BitDepth_Y; |
67 | | |
68 | 3.59M | int w = sps->pic_width_in_luma_samples; |
69 | 3.59M | int h = sps->pic_height_in_luma_samples; |
70 | | |
71 | 3.59M | ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)]; |
72 | | |
73 | 3.59M | if (xFracL==0 && yFracL==0) { |
74 | | |
75 | 2.09M | if (xIntOffsL >= 0 && yIntOffsL >= 0 && |
76 | 2.09M | nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) { |
77 | | |
78 | 2.02M | ctx->acceleration.put_hevc_qpel(out, out_stride, |
79 | 2.02M | &ref[yIntOffsL*ref_stride + xIntOffsL], |
80 | 2.02M | ref_stride /* sizeof(pixel_t)*/, |
81 | 2.02M | nPbW,nPbH, mcbuffer, 0,0, bitDepth_L); |
82 | 2.02M | } |
83 | 74.0k | else { |
84 | 691k | for (int y=0;y<nPbH;y++) |
85 | 7.06M | for (int x=0;x<nPbW;x++) { |
86 | | |
87 | 6.45M | int xA = Clip3(0,w-1,x + xIntOffsL); |
88 | 6.45M | int yA = Clip3(0,h-1,y + yIntOffsL); |
89 | | |
90 | 6.45M | out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3; |
91 | 6.45M | } |
92 | 74.0k | } |
93 | | |
94 | | #ifdef DE265_LOG_TRACE |
95 | | logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL); |
96 | | |
97 | | for (int y=0;y<nPbH;y++) { |
98 | | for (int x=0;x<nPbW;x++) { |
99 | | |
100 | | int xA = Clip3(0,w-1,x + xIntOffsL); |
101 | | int yA = Clip3(0,h-1,y + yIntOffsL); |
102 | | |
103 | | logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]); |
104 | | } |
105 | | logtrace(LogMotion,"\n"); |
106 | | } |
107 | | |
108 | | logtrace(LogMotion," -> \n"); |
109 | | |
110 | | for (int y=0;y<nPbH;y++) { |
111 | | for (int x=0;x<nPbW;x++) { |
112 | | |
113 | | logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions |
114 | | } |
115 | | logtrace(LogMotion,"\n"); |
116 | | } |
117 | | #endif |
118 | 2.09M | } |
119 | 1.49M | else { |
120 | 1.49M | int extra_left = extra_before[xFracL]; |
121 | 1.49M | int extra_right = extra_after [xFracL]; |
122 | 1.49M | int extra_top = extra_before[yFracL]; |
123 | 1.49M | int extra_bottom = extra_after [yFracL]; |
124 | | |
125 | | //int nPbW_extra = extra_left + nPbW + extra_right; |
126 | | //int nPbH_extra = extra_top + nPbH + extra_bottom; |
127 | | |
128 | | |
129 | 1.49M | pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)]; |
130 | | |
131 | 1.49M | const pixel_t* src_ptr; |
132 | 1.49M | int src_stride; |
133 | | |
134 | 1.49M | if (-extra_left + xIntOffsL >= 0 && |
135 | 1.49M | -extra_top + yIntOffsL >= 0 && |
136 | 1.49M | nPbW+extra_right + xIntOffsL < w && |
137 | 1.49M | nPbH+extra_bottom + yIntOffsL < h) { |
138 | 910k | src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride]; |
139 | 910k | src_stride = ref_stride; |
140 | 910k | } |
141 | 582k | else { |
142 | 9.06M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { |
143 | 147M | for (int x=-extra_left;x<nPbW+extra_right;x++) { |
144 | | |
145 | 139M | int xA = Clip3(0,w-1,x + xIntOffsL); |
146 | 139M | int yA = Clip3(0,h-1,y + yIntOffsL); |
147 | | |
148 | 139M | padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ]; |
149 | 139M | } |
150 | 8.47M | } |
151 | | |
152 | 582k | src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left]; |
153 | 582k | src_stride = MAX_CU_SIZE+16; |
154 | 582k | } |
155 | | |
156 | 1.49M | ctx->acceleration.put_hevc_qpel(out, out_stride, |
157 | 1.49M | src_ptr, src_stride /* sizeof(pixel_t) */, |
158 | 1.49M | nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L); |
159 | | |
160 | | |
161 | 1.49M | logtrace(LogMotion,"---V---\n"); |
162 | 15.4M | for (int y=0;y<nPbH;y++) { |
163 | 187M | for (int x=0;x<nPbW;x++) { |
164 | 174M | logtrace(LogMotion,"%04x ",out[x+y*out_stride]); |
165 | 174M | } |
166 | 13.9M | logtrace(LogMotion,"\n"); |
167 | 13.9M | } |
168 | 1.49M | } |
169 | 3.59M | } void mc_luma<unsigned short>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned short const*, int, int, int, int) Line | Count | Source | 55 | 1.54M | { | 56 | 1.54M | int xFracL = mv_x & 3; | 57 | 1.54M | int yFracL = mv_y & 3; | 58 | | | 59 | 1.54M | int xIntOffsL = xP + (mv_x>>2); | 60 | 1.54M | int yIntOffsL = yP + (mv_y>>2); | 61 | | | 62 | | // luma sample interpolation process (8.5.3.2.2.1) | 63 | | | 64 | | //const int shift1 = sps->BitDepth_Y-8; | 65 | | //const int shift2 = 6; | 66 | 1.54M | const int shift3 = 14 - sps->BitDepth_Y; | 67 | | | 68 | 1.54M | int w = sps->pic_width_in_luma_samples; | 69 | 1.54M | int h = sps->pic_height_in_luma_samples; | 70 | | | 71 | 1.54M | ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)]; | 72 | | | 73 | 1.54M | if (xFracL==0 && yFracL==0) { | 74 | | | 75 | 915k | if (xIntOffsL >= 0 && yIntOffsL >= 0 && | 76 | 915k | nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) { | 77 | | | 78 | 868k | ctx->acceleration.put_hevc_qpel(out, out_stride, | 79 | 868k | &ref[yIntOffsL*ref_stride + xIntOffsL], | 80 | 868k | ref_stride /* sizeof(pixel_t)*/, | 81 | 868k | nPbW,nPbH, mcbuffer, 0,0, bitDepth_L); | 82 | 868k | } | 83 | 46.7k | else { | 84 | 433k | for (int y=0;y<nPbH;y++) | 85 | 4.45M | for (int x=0;x<nPbW;x++) { | 86 | | | 87 | 4.07M | int xA = Clip3(0,w-1,x + xIntOffsL); | 88 | 4.07M | int yA = Clip3(0,h-1,y + yIntOffsL); | 89 | | | 90 | 4.07M | out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3; | 91 | 4.07M | } | 92 | 46.7k | } | 93 | | | 94 | | #ifdef DE265_LOG_TRACE | 95 | | logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL); | 96 | | | 97 | | for (int y=0;y<nPbH;y++) { | 98 | | for (int x=0;x<nPbW;x++) { | 99 | | | 100 | | int xA = Clip3(0,w-1,x + xIntOffsL); | 101 | | int yA = Clip3(0,h-1,y + yIntOffsL); | 102 | | | 103 | | logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]); | 104 | | } | 105 | | logtrace(LogMotion,"\n"); | 106 | | } | 107 | | | 108 | | logtrace(LogMotion," -> \n"); | 109 | | | 110 | | for (int y=0;y<nPbH;y++) { | 111 | | for (int x=0;x<nPbW;x++) { | 112 | | | 113 | | logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions | 114 | | } | 115 | | logtrace(LogMotion,"\n"); | 116 | | } | 117 | | #endif | 118 | 915k | } | 119 | 625k | else { | 120 | 625k | int extra_left = extra_before[xFracL]; | 121 | 625k | int extra_right = extra_after [xFracL]; | 122 | 625k | int extra_top = extra_before[yFracL]; | 123 | 625k | int extra_bottom = extra_after [yFracL]; | 124 | | | 125 | | //int nPbW_extra = extra_left + nPbW + extra_right; | 126 | | //int nPbH_extra = extra_top + nPbH + extra_bottom; | 127 | | | 128 | | | 129 | 625k | pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)]; | 130 | | | 131 | 625k | const pixel_t* src_ptr; | 132 | 625k | int src_stride; | 133 | | | 134 | 625k | if (-extra_left + xIntOffsL >= 0 && | 135 | 625k | -extra_top + yIntOffsL >= 0 && | 136 | 625k | nPbW+extra_right + xIntOffsL < w && | 137 | 625k | nPbH+extra_bottom + yIntOffsL < h) { | 138 | 281k | src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride]; | 139 | 281k | src_stride = ref_stride; | 140 | 281k | } | 141 | 343k | else { | 142 | 5.14M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { | 143 | 81.1M | for (int x=-extra_left;x<nPbW+extra_right;x++) { | 144 | | | 145 | 76.3M | int xA = Clip3(0,w-1,x + xIntOffsL); | 146 | 76.3M | int yA = Clip3(0,h-1,y + yIntOffsL); | 147 | | | 148 | 76.3M | padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ]; | 149 | 76.3M | } | 150 | 4.79M | } | 151 | | | 152 | 343k | src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left]; | 153 | 343k | src_stride = MAX_CU_SIZE+16; | 154 | 343k | } | 155 | | | 156 | 625k | ctx->acceleration.put_hevc_qpel(out, out_stride, | 157 | 625k | src_ptr, src_stride /* sizeof(pixel_t) */, | 158 | 625k | nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L); | 159 | | | 160 | | | 161 | 625k | logtrace(LogMotion,"---V---\n"); | 162 | 6.40M | for (int y=0;y<nPbH;y++) { | 163 | 80.0M | for (int x=0;x<nPbW;x++) { | 164 | 74.3M | logtrace(LogMotion,"%04x ",out[x+y*out_stride]); | 165 | 74.3M | } | 166 | 5.77M | logtrace(LogMotion,"\n"); | 167 | 5.77M | } | 168 | 625k | } | 169 | 1.54M | } |
void mc_luma<unsigned char>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned char const*, int, int, int, int) Line | Count | Source | 55 | 2.05M | { | 56 | 2.05M | int xFracL = mv_x & 3; | 57 | 2.05M | int yFracL = mv_y & 3; | 58 | | | 59 | 2.05M | int xIntOffsL = xP + (mv_x>>2); | 60 | 2.05M | int yIntOffsL = yP + (mv_y>>2); | 61 | | | 62 | | // luma sample interpolation process (8.5.3.2.2.1) | 63 | | | 64 | | //const int shift1 = sps->BitDepth_Y-8; | 65 | | //const int shift2 = 6; | 66 | 2.05M | const int shift3 = 14 - sps->BitDepth_Y; | 67 | | | 68 | 2.05M | int w = sps->pic_width_in_luma_samples; | 69 | 2.05M | int h = sps->pic_height_in_luma_samples; | 70 | | | 71 | 2.05M | ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)]; | 72 | | | 73 | 2.05M | if (xFracL==0 && yFracL==0) { | 74 | | | 75 | 1.18M | if (xIntOffsL >= 0 && yIntOffsL >= 0 && | 76 | 1.18M | nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) { | 77 | | | 78 | 1.15M | ctx->acceleration.put_hevc_qpel(out, out_stride, | 79 | 1.15M | &ref[yIntOffsL*ref_stride + xIntOffsL], | 80 | 1.15M | ref_stride /* sizeof(pixel_t)*/, | 81 | 1.15M | nPbW,nPbH, mcbuffer, 0,0, bitDepth_L); | 82 | 1.15M | } | 83 | 27.2k | else { | 84 | 257k | for (int y=0;y<nPbH;y++) | 85 | 2.61M | for (int x=0;x<nPbW;x++) { | 86 | | | 87 | 2.38M | int xA = Clip3(0,w-1,x + xIntOffsL); | 88 | 2.38M | int yA = Clip3(0,h-1,y + yIntOffsL); | 89 | | | 90 | 2.38M | out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3; | 91 | 2.38M | } | 92 | 27.2k | } | 93 | | | 94 | | #ifdef DE265_LOG_TRACE | 95 | | logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL); | 96 | | | 97 | | for (int y=0;y<nPbH;y++) { | 98 | | for (int x=0;x<nPbW;x++) { | 99 | | | 100 | | int xA = Clip3(0,w-1,x + xIntOffsL); | 101 | | int yA = Clip3(0,h-1,y + yIntOffsL); | 102 | | | 103 | | logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]); | 104 | | } | 105 | | logtrace(LogMotion,"\n"); | 106 | | } | 107 | | | 108 | | logtrace(LogMotion," -> \n"); | 109 | | | 110 | | for (int y=0;y<nPbH;y++) { | 111 | | for (int x=0;x<nPbW;x++) { | 112 | | | 113 | | logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions | 114 | | } | 115 | | logtrace(LogMotion,"\n"); | 116 | | } | 117 | | #endif | 118 | 1.18M | } | 119 | 867k | else { | 120 | 867k | int extra_left = extra_before[xFracL]; | 121 | 867k | int extra_right = extra_after [xFracL]; | 122 | 867k | int extra_top = extra_before[yFracL]; | 123 | 867k | int extra_bottom = extra_after [yFracL]; | 124 | | | 125 | | //int nPbW_extra = extra_left + nPbW + extra_right; | 126 | | //int nPbH_extra = extra_top + nPbH + extra_bottom; | 127 | | | 128 | | | 129 | 867k | pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)]; | 130 | | | 131 | 867k | const pixel_t* src_ptr; | 132 | 867k | int src_stride; | 133 | | | 134 | 867k | if (-extra_left + xIntOffsL >= 0 && | 135 | 867k | -extra_top + yIntOffsL >= 0 && | 136 | 867k | nPbW+extra_right + xIntOffsL < w && | 137 | 867k | nPbH+extra_bottom + yIntOffsL < h) { | 138 | 628k | src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride]; | 139 | 628k | src_stride = ref_stride; | 140 | 628k | } | 141 | 238k | else { | 142 | 3.92M | for (int y=-extra_top;y<nPbH+extra_bottom;y++) { | 143 | 66.4M | for (int x=-extra_left;x<nPbW+extra_right;x++) { | 144 | | | 145 | 62.7M | int xA = Clip3(0,w-1,x + xIntOffsL); | 146 | 62.7M | int yA = Clip3(0,h-1,y + yIntOffsL); | 147 | | | 148 | 62.7M | padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ]; | 149 | 62.7M | } | 150 | 3.68M | } | 151 | | | 152 | 238k | src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left]; | 153 | 238k | src_stride = MAX_CU_SIZE+16; | 154 | 238k | } | 155 | | | 156 | 867k | ctx->acceleration.put_hevc_qpel(out, out_stride, | 157 | 867k | src_ptr, src_stride /* sizeof(pixel_t) */, | 158 | 867k | nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L); | 159 | | | 160 | | | 161 | 867k | logtrace(LogMotion,"---V---\n"); | 162 | 9.00M | for (int y=0;y<nPbH;y++) { | 163 | 107M | for (int x=0;x<nPbW;x++) { | 164 | 99.7M | logtrace(LogMotion,"%04x ",out[x+y*out_stride]); | 165 | 99.7M | } | 166 | 8.13M | logtrace(LogMotion,"\n"); | 167 | 8.13M | } | 168 | 867k | } | 169 | 2.05M | } |
|
170 | | |
171 | | |
172 | | |
173 | | template <class pixel_t> |
174 | | void mc_chroma(const base_context* ctx, |
175 | | const seq_parameter_set* sps, |
176 | | int mv_x, int mv_y, |
177 | | int xP,int yP, |
178 | | int16_t* out, int out_stride, |
179 | | const pixel_t* ref, int ref_stride, |
180 | | int nPbWC, int nPbHC, int bit_depth_C) |
181 | 6.62M | { |
182 | | // chroma sample interpolation process (8.5.3.2.2.2) |
183 | | |
184 | | //const int shift1 = sps->BitDepth_C-8; |
185 | | //const int shift2 = 6; |
186 | 6.62M | const int shift3 = 14 - sps->BitDepth_C; |
187 | | |
188 | 6.62M | int wC = sps->pic_width_in_luma_samples /sps->SubWidthC; |
189 | 6.62M | int hC = sps->pic_height_in_luma_samples/sps->SubHeightC; |
190 | | |
191 | 6.62M | mv_x *= 2 / sps->SubWidthC; |
192 | 6.62M | mv_y *= 2 / sps->SubHeightC; |
193 | | |
194 | 6.62M | int xFracC = mv_x & 7; |
195 | 6.62M | int yFracC = mv_y & 7; |
196 | | |
197 | 6.62M | int xIntOffsC = xP/sps->SubWidthC + (mv_x>>3); |
198 | 6.62M | int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3); |
199 | | |
200 | 6.62M | ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]); |
201 | | |
202 | 6.62M | if (xFracC == 0 && yFracC == 0) { |
203 | 3.75M | if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC && |
204 | 3.75M | yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) { |
205 | 3.67M | ctx->acceleration.put_hevc_epel(out, out_stride, |
206 | 3.67M | &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride, |
207 | 3.67M | nPbWC,nPbHC, 0,0, NULL, bit_depth_C); |
208 | 3.67M | } |
209 | 73.4k | else |
210 | 73.4k | { |
211 | 474k | for (int y=0;y<nPbHC;y++) |
212 | 3.24M | for (int x=0;x<nPbWC;x++) { |
213 | | |
214 | 2.84M | int xB = Clip3(0,wC-1,x + xIntOffsC); |
215 | 2.84M | int yB = Clip3(0,hC-1,y + yIntOffsC); |
216 | | |
217 | 2.84M | out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3; |
218 | 2.84M | } |
219 | 73.4k | } |
220 | 3.75M | } |
221 | 2.87M | else { |
222 | 2.87M | pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)]; |
223 | | |
224 | 2.87M | const pixel_t* src_ptr; |
225 | 2.87M | int src_stride; |
226 | | |
227 | 2.87M | int extra_top = 1; |
228 | 2.87M | int extra_left = 1; |
229 | 2.87M | int extra_right = 2; |
230 | 2.87M | int extra_bottom = 2; |
231 | | |
232 | 2.87M | if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 && |
233 | 2.87M | yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) { |
234 | 1.81M | src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride]; |
235 | 1.81M | src_stride = ref_stride; |
236 | 1.81M | } |
237 | 1.06M | else { |
238 | 10.2M | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { |
239 | 104M | for (int x=-extra_left;x<nPbWC+extra_right;x++) { |
240 | | |
241 | 95.4M | int xA = Clip3(0,wC-1,x + xIntOffsC); |
242 | 95.4M | int yA = Clip3(0,hC-1,y + yIntOffsC); |
243 | | |
244 | 95.4M | padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ]; |
245 | 95.4M | } |
246 | 9.22M | } |
247 | | |
248 | 1.06M | src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)]; |
249 | 1.06M | src_stride = MAX_CU_SIZE+16; |
250 | 1.06M | } |
251 | | |
252 | | |
253 | 2.87M | if (xFracC && yFracC) { |
254 | 1.75M | ctx->acceleration.put_hevc_epel_hv(out, out_stride, |
255 | 1.75M | src_ptr, src_stride, |
256 | 1.75M | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); |
257 | 1.75M | } |
258 | 1.12M | else if (xFracC) { |
259 | 529k | ctx->acceleration.put_hevc_epel_h(out, out_stride, |
260 | 529k | src_ptr, src_stride, |
261 | 529k | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); |
262 | 529k | } |
263 | 594k | else if (yFracC) { |
264 | 594k | ctx->acceleration.put_hevc_epel_v(out, out_stride, |
265 | 594k | src_ptr, src_stride, |
266 | 594k | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); |
267 | 594k | } |
268 | 0 | else { |
269 | 0 | assert(false); // full-pel shifts are handled above |
270 | 0 | } |
271 | 2.87M | } |
272 | 6.62M | } void mc_chroma<unsigned short>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned short const*, int, int, int, int) Line | Count | Source | 181 | 2.53M | { | 182 | | // chroma sample interpolation process (8.5.3.2.2.2) | 183 | | | 184 | | //const int shift1 = sps->BitDepth_C-8; | 185 | | //const int shift2 = 6; | 186 | 2.53M | const int shift3 = 14 - sps->BitDepth_C; | 187 | | | 188 | 2.53M | int wC = sps->pic_width_in_luma_samples /sps->SubWidthC; | 189 | 2.53M | int hC = sps->pic_height_in_luma_samples/sps->SubHeightC; | 190 | | | 191 | 2.53M | mv_x *= 2 / sps->SubWidthC; | 192 | 2.53M | mv_y *= 2 / sps->SubHeightC; | 193 | | | 194 | 2.53M | int xFracC = mv_x & 7; | 195 | 2.53M | int yFracC = mv_y & 7; | 196 | | | 197 | 2.53M | int xIntOffsC = xP/sps->SubWidthC + (mv_x>>3); | 198 | 2.53M | int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3); | 199 | | | 200 | 2.53M | ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]); | 201 | | | 202 | 2.53M | if (xFracC == 0 && yFracC == 0) { | 203 | 1.36M | if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC && | 204 | 1.36M | yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) { | 205 | 1.32M | ctx->acceleration.put_hevc_epel(out, out_stride, | 206 | 1.32M | &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride, | 207 | 1.32M | nPbWC,nPbHC, 0,0, NULL, bit_depth_C); | 208 | 1.32M | } | 209 | 42.5k | else | 210 | 42.5k | { | 211 | 255k | for (int y=0;y<nPbHC;y++) | 212 | 1.47M | for (int x=0;x<nPbWC;x++) { | 213 | | | 214 | 1.26M | int xB = Clip3(0,wC-1,x + xIntOffsC); | 215 | 1.26M | int yB = Clip3(0,hC-1,y + yIntOffsC); | 216 | | | 217 | 1.26M | out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3; | 218 | 1.26M | } | 219 | 42.5k | } | 220 | 1.36M | } | 221 | 1.17M | else { | 222 | 1.17M | pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)]; | 223 | | | 224 | 1.17M | const pixel_t* src_ptr; | 225 | 1.17M | int src_stride; | 226 | | | 227 | 1.17M | int extra_top = 1; | 228 | 1.17M | int extra_left = 1; | 229 | 1.17M | int extra_right = 2; | 230 | 1.17M | int extra_bottom = 2; | 231 | | | 232 | 1.17M | if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 && | 233 | 1.17M | yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) { | 234 | 611k | src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride]; | 235 | 611k | src_stride = ref_stride; | 236 | 611k | } | 237 | 561k | else { | 238 | 4.95M | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { | 239 | 43.6M | for (int x=-extra_left;x<nPbWC+extra_right;x++) { | 240 | | | 241 | 39.2M | int xA = Clip3(0,wC-1,x + xIntOffsC); | 242 | 39.2M | int yA = Clip3(0,hC-1,y + yIntOffsC); | 243 | | | 244 | 39.2M | padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ]; | 245 | 39.2M | } | 246 | 4.39M | } | 247 | | | 248 | 561k | src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)]; | 249 | 561k | src_stride = MAX_CU_SIZE+16; | 250 | 561k | } | 251 | | | 252 | | | 253 | 1.17M | if (xFracC && yFracC) { | 254 | 790k | ctx->acceleration.put_hevc_epel_hv(out, out_stride, | 255 | 790k | src_ptr, src_stride, | 256 | 790k | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); | 257 | 790k | } | 258 | 382k | else if (xFracC) { | 259 | 169k | ctx->acceleration.put_hevc_epel_h(out, out_stride, | 260 | 169k | src_ptr, src_stride, | 261 | 169k | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); | 262 | 169k | } | 263 | 212k | else if (yFracC) { | 264 | 212k | ctx->acceleration.put_hevc_epel_v(out, out_stride, | 265 | 212k | src_ptr, src_stride, | 266 | 212k | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); | 267 | 212k | } | 268 | 0 | else { | 269 | 0 | assert(false); // full-pel shifts are handled above | 270 | 0 | } | 271 | 1.17M | } | 272 | 2.53M | } |
void mc_chroma<unsigned char>(base_context const*, seq_parameter_set const*, int, int, int, int, short*, int, unsigned char const*, int, int, int, int) Line | Count | Source | 181 | 4.09M | { | 182 | | // chroma sample interpolation process (8.5.3.2.2.2) | 183 | | | 184 | | //const int shift1 = sps->BitDepth_C-8; | 185 | | //const int shift2 = 6; | 186 | 4.09M | const int shift3 = 14 - sps->BitDepth_C; | 187 | | | 188 | 4.09M | int wC = sps->pic_width_in_luma_samples /sps->SubWidthC; | 189 | 4.09M | int hC = sps->pic_height_in_luma_samples/sps->SubHeightC; | 190 | | | 191 | 4.09M | mv_x *= 2 / sps->SubWidthC; | 192 | 4.09M | mv_y *= 2 / sps->SubHeightC; | 193 | | | 194 | 4.09M | int xFracC = mv_x & 7; | 195 | 4.09M | int yFracC = mv_y & 7; | 196 | | | 197 | 4.09M | int xIntOffsC = xP/sps->SubWidthC + (mv_x>>3); | 198 | 4.09M | int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3); | 199 | | | 200 | 4.09M | ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]); | 201 | | | 202 | 4.09M | if (xFracC == 0 && yFracC == 0) { | 203 | 2.38M | if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC && | 204 | 2.38M | yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) { | 205 | 2.35M | ctx->acceleration.put_hevc_epel(out, out_stride, | 206 | 2.35M | &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride, | 207 | 2.35M | nPbWC,nPbHC, 0,0, NULL, bit_depth_C); | 208 | 2.35M | } | 209 | 30.8k | else | 210 | 30.8k | { | 211 | 218k | for (int y=0;y<nPbHC;y++) | 212 | 1.76M | for (int x=0;x<nPbWC;x++) { | 213 | | | 214 | 1.58M | int xB = Clip3(0,wC-1,x + xIntOffsC); | 215 | 1.58M | int yB = Clip3(0,hC-1,y + yIntOffsC); | 216 | | | 217 | 1.58M | out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3; | 218 | 1.58M | } | 219 | 30.8k | } | 220 | 2.38M | } | 221 | 1.70M | else { | 222 | 1.70M | pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)]; | 223 | | | 224 | 1.70M | const pixel_t* src_ptr; | 225 | 1.70M | int src_stride; | 226 | | | 227 | 1.70M | int extra_top = 1; | 228 | 1.70M | int extra_left = 1; | 229 | 1.70M | int extra_right = 2; | 230 | 1.70M | int extra_bottom = 2; | 231 | | | 232 | 1.70M | if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 && | 233 | 1.70M | yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) { | 234 | 1.19M | src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride]; | 235 | 1.19M | src_stride = ref_stride; | 236 | 1.19M | } | 237 | 507k | else { | 238 | 5.34M | for (int y=-extra_top;y<nPbHC+extra_bottom;y++) { | 239 | 61.0M | for (int x=-extra_left;x<nPbWC+extra_right;x++) { | 240 | | | 241 | 56.2M | int xA = Clip3(0,wC-1,x + xIntOffsC); | 242 | 56.2M | int yA = Clip3(0,hC-1,y + yIntOffsC); | 243 | | | 244 | 56.2M | padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ]; | 245 | 56.2M | } | 246 | 4.83M | } | 247 | | | 248 | 507k | src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)]; | 249 | 507k | src_stride = MAX_CU_SIZE+16; | 250 | 507k | } | 251 | | | 252 | | | 253 | 1.70M | if (xFracC && yFracC) { | 254 | 964k | ctx->acceleration.put_hevc_epel_hv(out, out_stride, | 255 | 964k | src_ptr, src_stride, | 256 | 964k | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); | 257 | 964k | } | 258 | 741k | else if (xFracC) { | 259 | 359k | ctx->acceleration.put_hevc_epel_h(out, out_stride, | 260 | 359k | src_ptr, src_stride, | 261 | 359k | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); | 262 | 359k | } | 263 | 381k | else if (yFracC) { | 264 | 381k | ctx->acceleration.put_hevc_epel_v(out, out_stride, | 265 | 381k | src_ptr, src_stride, | 266 | 381k | nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C); | 267 | 381k | } | 268 | 0 | else { | 269 | 0 | assert(false); // full-pel shifts are handled above | 270 | 0 | } | 271 | 1.70M | } | 272 | 4.09M | } |
|
273 | | |
274 | | |
275 | | |
276 | | // 8.5.3.2 |
277 | | // NOTE: for full-pel shifts, we can introduce a fast path, simply copying without shifts |
278 | | void generate_inter_prediction_samples(base_context* ctx, |
279 | | const slice_segment_header* shdr, |
280 | | de265_image* img, |
281 | | int xC,int yC, |
282 | | int xB,int yB, |
283 | | int nCS, int nPbW,int nPbH, |
284 | | const PBMotion* vi) |
285 | 2.76M | { |
286 | 2.76M | int xP = xC+xB; |
287 | 2.76M | int yP = yC+yB; |
288 | | |
289 | 2.76M | void* pixels[3]; |
290 | 2.76M | int stride[3]; |
291 | | |
292 | 2.76M | const pic_parameter_set* pps = shdr->pps.get(); |
293 | 2.76M | const seq_parameter_set* sps = pps->sps.get(); |
294 | | |
295 | 2.76M | if (sps->BitDepth_Y != img->get_bit_depth(0) || |
296 | 2.76M | sps->BitDepth_C != img->get_bit_depth(1)) { |
297 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
298 | 0 | ctx->add_warning(DE265_WARNING_BIT_DEPTH_OF_CURRENT_IMAGE_DOES_NOT_MATCH_SPS, false); |
299 | 0 | return; |
300 | 0 | } |
301 | | |
302 | 2.76M | if (sps->chroma_format_idc != img->get_chroma_format()) { |
303 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
304 | 0 | ctx->add_warning(DE265_WARNING_CHROMA_OF_CURRENT_IMAGE_DOES_NOT_MATCH_SPS, false); |
305 | 0 | return; |
306 | 0 | } |
307 | | |
308 | 2.76M | const int SubWidthC = sps->SubWidthC; |
309 | 2.76M | const int SubHeightC = sps->SubHeightC; |
310 | | |
311 | 2.76M | pixels[0] = img->get_image_plane_at_pos_any_depth(0,xP,yP); |
312 | 2.76M | stride[0] = img->get_image_stride(0); |
313 | | |
314 | 2.76M | pixels[1] = img->get_image_plane_at_pos_any_depth(1,xP/SubWidthC,yP/SubHeightC); |
315 | 2.76M | stride[1] = img->get_image_stride(1); |
316 | | |
317 | 2.76M | pixels[2] = img->get_image_plane_at_pos_any_depth(2,xP/SubWidthC,yP/SubHeightC); |
318 | 2.76M | stride[2] = img->get_image_stride(2); |
319 | | |
320 | | |
321 | 2.76M | ALIGNED_16(int16_t) predSamplesL [2 /* LX */][MAX_CU_SIZE* MAX_CU_SIZE]; |
322 | 2.76M | ALIGNED_16(int16_t) predSamplesC[2 /* chroma */ ][2 /* LX */][MAX_CU_SIZE* MAX_CU_SIZE]; |
323 | | |
324 | | //int xP = xC+xB; |
325 | | //int yP = yC+yB; |
326 | | |
327 | 2.76M | int predFlag[2]; |
328 | 2.76M | predFlag[0] = vi->predFlag[0]; |
329 | 2.76M | predFlag[1] = vi->predFlag[1]; |
330 | | |
331 | 2.76M | const int bit_depth_L = sps->BitDepth_Y; |
332 | 2.76M | const int bit_depth_C = sps->BitDepth_C; |
333 | | |
334 | | // Some encoders use bi-prediction with two similar MVs. |
335 | | // Identify this case and use only one MV. |
336 | | |
337 | | // do this only without weighted prediction, because the weights/offsets may be different |
338 | 2.76M | if (pps->weighted_pred_flag==0) { |
339 | 812k | if (predFlag[0] && predFlag[1]) { |
340 | 317k | if (vi->mv[0].x == vi->mv[1].x && |
341 | 317k | vi->mv[0].y == vi->mv[1].y && |
342 | 317k | shdr->RefPicList[0][vi->refIdx[0]] == |
343 | 238k | shdr->RefPicList[1][vi->refIdx[1]]) { |
344 | 178k | predFlag[1] = 0; |
345 | 178k | } |
346 | 317k | } |
347 | 812k | } |
348 | | |
349 | | |
350 | 8.28M | for (int l=0;l<2;l++) { |
351 | 5.52M | if (predFlag[l]) { |
352 | | // 8.5.3.2.1 |
353 | | |
354 | 3.59M | if (vi->refIdx[l] >= MAX_NUM_REF_PICS) { |
355 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
356 | 0 | ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false); |
357 | 0 | return; |
358 | 0 | } |
359 | | |
360 | 3.59M | const de265_image* refPic = ctx->get_image(shdr->RefPicList[l][vi->refIdx[l]]); |
361 | | |
362 | 3.59M | logtrace(LogMotion, "refIdx: %d -> dpb[%d]\n", vi->refIdx[l], shdr->RefPicList[l][vi->refIdx[l]]); |
363 | | |
364 | 3.59M | if (!refPic || refPic->PicState == UnusedForReference) { |
365 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
366 | 0 | ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false); |
367 | | |
368 | | // TODO: fill predSamplesC with black or grey |
369 | 0 | } |
370 | 3.59M | else if (refPic->get_width(0) != sps->pic_width_in_luma_samples || |
371 | 3.59M | refPic->get_height(0) != sps->pic_height_in_luma_samples || |
372 | 3.59M | img->get_chroma_format() != refPic->get_chroma_format()) { |
373 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
374 | 0 | ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_SIZE_DOES_NOT_MATCH_SPS, false); |
375 | 0 | } |
376 | 3.59M | else if (img->get_bit_depth(0) != refPic->get_bit_depth(0) || |
377 | 3.59M | img->get_bit_depth(1) != refPic->get_bit_depth(1)) { |
378 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
379 | 0 | ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_BIT_DEPTH_DOES_NOT_MATCH, false); |
380 | 0 | } |
381 | 3.59M | else if (img->get_chroma_format() != refPic->get_chroma_format()) { |
382 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
383 | 0 | ctx->add_warning(DE265_WARNING_REFERENCE_IMAGE_CHROMA_FORMAT_DOES_NOT_MATCH, false); |
384 | 0 | } |
385 | 3.59M | else { |
386 | | // 8.5.3.2.2 |
387 | | |
388 | 3.59M | logtrace(LogMotion,"do MC: L%d,MV=%d;%d RefPOC=%d\n", |
389 | 3.59M | l,vi->mv[l].x,vi->mv[l].y,refPic->PicOrderCntVal); |
390 | | |
391 | | |
392 | | // TODO: must predSamples stride really be nCS or can it be something smaller like nPbW? |
393 | | |
394 | 3.59M | if (img->high_bit_depth(0)) { |
395 | 1.54M | mc_luma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP,yP, |
396 | 1.54M | predSamplesL[l],nCS, |
397 | 1.54M | (const uint16_t*)refPic->get_image_plane(0), |
398 | 1.54M | refPic->get_luma_stride(), nPbW,nPbH, bit_depth_L); |
399 | 1.54M | } |
400 | 2.05M | else { |
401 | 2.05M | mc_luma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP,yP, |
402 | 2.05M | predSamplesL[l],nCS, |
403 | 2.05M | (const uint8_t*)refPic->get_image_plane(0), |
404 | 2.05M | refPic->get_luma_stride(), nPbW,nPbH, bit_depth_L); |
405 | 2.05M | } |
406 | | |
407 | 3.59M | if (img->get_chroma_format() != de265_chroma_mono) { |
408 | 3.31M | if (img->high_bit_depth(1)) { |
409 | 1.26M | mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP, |
410 | 1.26M | predSamplesC[0][l], nCS, (const uint16_t*) refPic->get_image_plane(1), |
411 | 1.26M | refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
412 | 1.26M | mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP, |
413 | 1.26M | predSamplesC[1][l], nCS, (const uint16_t*) refPic->get_image_plane(2), |
414 | 1.26M | refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
415 | 1.26M | } |
416 | 2.04M | else { |
417 | 2.04M | mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP, |
418 | 2.04M | predSamplesC[0][l], nCS, (const uint8_t*) refPic->get_image_plane(1), |
419 | 2.04M | refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
420 | 2.04M | mc_chroma(ctx, sps, vi->mv[l].x, vi->mv[l].y, xP, yP, |
421 | 2.04M | predSamplesC[1][l], nCS, (const uint8_t*) refPic->get_image_plane(2), |
422 | 2.04M | refPic->get_chroma_stride(), nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
423 | 2.04M | } |
424 | 3.31M | } |
425 | 3.59M | } |
426 | 3.59M | } |
427 | 5.52M | } |
428 | | |
429 | | |
430 | | // weighted sample prediction (8.5.3.2.3) |
431 | | |
432 | 2.76M | const int shift1_L = libde265_max(2,14-sps->BitDepth_Y); |
433 | 2.76M | const int offset_shift1_L = img->get_sps().WpOffsetBdShiftY; |
434 | 2.76M | const int shift1_C = libde265_max(2,14-sps->BitDepth_C); |
435 | 2.76M | const int offset_shift1_C = img->get_sps().WpOffsetBdShiftC; |
436 | | |
437 | | /* |
438 | | const int shift1_L = 14-img->sps.BitDepth_Y; |
439 | | const int offset_shift1_L = img->sps.BitDepth_Y-8; |
440 | | const int shift1_C = 14-img->sps.BitDepth_C; |
441 | | const int offset_shift1_C = img->sps.BitDepth_C-8; |
442 | | */ |
443 | | |
444 | | /* |
445 | | if (0) |
446 | | printf("%d/%d %d/%d %d/%d %d/%d\n", |
447 | | shift1_L, |
448 | | Nshift1_L, |
449 | | offset_shift1_L, |
450 | | Noffset_shift1_L, |
451 | | shift1_C, |
452 | | Nshift1_C, |
453 | | offset_shift1_C, |
454 | | Noffset_shift1_C); |
455 | | |
456 | | assert(shift1_L== |
457 | | Nshift1_L); |
458 | | assert(offset_shift1_L== |
459 | | Noffset_shift1_L); |
460 | | assert(shift1_C== |
461 | | Nshift1_C); |
462 | | assert(offset_shift1_C== |
463 | | Noffset_shift1_C); |
464 | | */ |
465 | | |
466 | | |
467 | 2.76M | logtrace(LogMotion,"predFlags (modified): %d %d\n", predFlag[0], predFlag[1]); |
468 | | |
469 | 2.76M | if (shdr->slice_type == SLICE_TYPE_P) { |
470 | 298k | if (pps->weighted_pred_flag==0) { |
471 | 143k | if (predFlag[0]==1 && predFlag[1]==0) { |
472 | 143k | ctx->acceleration.put_unweighted_pred(pixels[0], stride[0], |
473 | 143k | predSamplesL[0],nCS, nPbW,nPbH, bit_depth_L); |
474 | | |
475 | 143k | if (img->get_chroma_format() != de265_chroma_mono) { |
476 | 122k | ctx->acceleration.put_unweighted_pred(pixels[1], stride[1], |
477 | 122k | predSamplesC[0][0], nCS, |
478 | 122k | nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
479 | 122k | ctx->acceleration.put_unweighted_pred(pixels[2], stride[2], |
480 | 122k | predSamplesC[1][0], nCS, |
481 | 122k | nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
482 | 122k | } |
483 | 143k | } |
484 | 0 | else { |
485 | 0 | ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false); |
486 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
487 | 0 | } |
488 | 143k | } |
489 | 154k | else { |
490 | | // weighted prediction |
491 | | |
492 | 154k | if (predFlag[0]==1 && predFlag[1]==0) { |
493 | | |
494 | 154k | int refIdx0 = vi->refIdx[0]; |
495 | | |
496 | 154k | int luma_log2WD = shdr->luma_log2_weight_denom + shift1_L; |
497 | 154k | int chroma_log2WD = shdr->ChromaLog2WeightDenom + shift1_C; |
498 | | |
499 | 154k | int luma_w0 = shdr->LumaWeight[0][refIdx0]; |
500 | 154k | int luma_o0 = shdr->luma_offset[0][refIdx0] * (1<<(offset_shift1_L)); |
501 | | |
502 | 154k | int chroma0_w0 = shdr->ChromaWeight[0][refIdx0][0]; |
503 | 154k | int chroma0_o0 = shdr->ChromaOffset[0][refIdx0][0] * (1<<(offset_shift1_C)); |
504 | 154k | int chroma1_w0 = shdr->ChromaWeight[0][refIdx0][1]; |
505 | 154k | int chroma1_o0 = shdr->ChromaOffset[0][refIdx0][1] * (1<<(offset_shift1_C)); |
506 | | |
507 | 154k | logtrace(LogMotion,"weighted-0 [%d] %d %d %d %dx%d\n", refIdx0, luma_log2WD-6,luma_w0,luma_o0,nPbW,nPbH); |
508 | | |
509 | 154k | ctx->acceleration.put_weighted_pred(pixels[0], stride[0], |
510 | 154k | predSamplesL[0],nCS, nPbW,nPbH, |
511 | 154k | luma_w0, luma_o0, luma_log2WD, bit_depth_L); |
512 | 154k | if (img->get_chroma_format() != de265_chroma_mono) { |
513 | 130k | ctx->acceleration.put_weighted_pred(pixels[1], stride[1], |
514 | 130k | predSamplesC[0][0], nCS, nPbW / SubWidthC, nPbH / SubHeightC, |
515 | 130k | chroma0_w0, chroma0_o0, chroma_log2WD, bit_depth_C); |
516 | 130k | ctx->acceleration.put_weighted_pred(pixels[2], stride[2], |
517 | 130k | predSamplesC[1][0], nCS, nPbW / SubWidthC, nPbH / SubHeightC, |
518 | 130k | chroma1_w0, chroma1_o0, chroma_log2WD, bit_depth_C); |
519 | 130k | } |
520 | 154k | } |
521 | 0 | else { |
522 | 0 | ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false); |
523 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
524 | 0 | } |
525 | 154k | } |
526 | 298k | } |
527 | 2.46M | else { |
528 | 2.46M | assert(shdr->slice_type == SLICE_TYPE_B); |
529 | | |
530 | 2.46M | if (predFlag[0]==1 && predFlag[1]==1) { |
531 | 832k | if (pps->weighted_bipred_flag==0) { |
532 | | //const int shift2 = 15-8; // TODO: real bit depth |
533 | | //const int offset2 = 1<<(shift2-1); |
534 | | |
535 | 515k | int16_t* in0 = predSamplesL[0]; |
536 | 515k | int16_t* in1 = predSamplesL[1]; |
537 | | |
538 | 515k | ctx->acceleration.put_weighted_pred_avg(pixels[0], stride[0], |
539 | 515k | in0,in1, nCS, nPbW, nPbH, bit_depth_L); |
540 | | |
541 | 515k | int16_t* in00 = predSamplesC[0][0]; |
542 | 515k | int16_t* in01 = predSamplesC[0][1]; |
543 | 515k | int16_t* in10 = predSamplesC[1][0]; |
544 | 515k | int16_t* in11 = predSamplesC[1][1]; |
545 | | |
546 | 515k | if (img->get_chroma_format() != de265_chroma_mono) { |
547 | 472k | ctx->acceleration.put_weighted_pred_avg(pixels[1], stride[1], |
548 | 472k | in00, in01, nCS, |
549 | 472k | nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
550 | 472k | ctx->acceleration.put_weighted_pred_avg(pixels[2], stride[2], |
551 | 472k | in10, in11, nCS, |
552 | 472k | nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
553 | 472k | } |
554 | 515k | } |
555 | 317k | else { |
556 | | // weighted prediction |
557 | | |
558 | 317k | int refIdx0 = vi->refIdx[0]; |
559 | 317k | int refIdx1 = vi->refIdx[1]; |
560 | | |
561 | 317k | int luma_log2WD = shdr->luma_log2_weight_denom + shift1_L; |
562 | 317k | int chroma_log2WD = shdr->ChromaLog2WeightDenom + shift1_C; |
563 | | |
564 | 317k | int luma_w0 = shdr->LumaWeight[0][refIdx0]; |
565 | 317k | int luma_o0 = shdr->luma_offset[0][refIdx0] * (1<<(offset_shift1_L)); |
566 | 317k | int luma_w1 = shdr->LumaWeight[1][refIdx1]; |
567 | 317k | int luma_o1 = shdr->luma_offset[1][refIdx1] * (1<<(offset_shift1_L)); |
568 | | |
569 | 317k | int chroma0_w0 = shdr->ChromaWeight[0][refIdx0][0]; |
570 | 317k | int chroma0_o0 = shdr->ChromaOffset[0][refIdx0][0] * (1<<(offset_shift1_C)); |
571 | 317k | int chroma1_w0 = shdr->ChromaWeight[0][refIdx0][1]; |
572 | 317k | int chroma1_o0 = shdr->ChromaOffset[0][refIdx0][1] * (1<<(offset_shift1_C)); |
573 | 317k | int chroma0_w1 = shdr->ChromaWeight[1][refIdx1][0]; |
574 | 317k | int chroma0_o1 = shdr->ChromaOffset[1][refIdx1][0] * (1<<(offset_shift1_C)); |
575 | 317k | int chroma1_w1 = shdr->ChromaWeight[1][refIdx1][1]; |
576 | 317k | int chroma1_o1 = shdr->ChromaOffset[1][refIdx1][1] * (1<<(offset_shift1_C)); |
577 | | |
578 | 317k | logtrace(LogMotion,"weighted-BI-0 [%d] %d %d %d %dx%d\n", refIdx0, luma_log2WD-6,luma_w0,luma_o0,nPbW,nPbH); |
579 | 317k | logtrace(LogMotion,"weighted-BI-1 [%d] %d %d %d %dx%d\n", refIdx1, luma_log2WD-6,luma_w1,luma_o1,nPbW,nPbH); |
580 | | |
581 | 317k | int16_t* in0 = predSamplesL[0]; |
582 | 317k | int16_t* in1 = predSamplesL[1]; |
583 | | |
584 | 317k | ctx->acceleration.put_weighted_bipred(pixels[0], stride[0], |
585 | 317k | in0,in1, nCS, nPbW, nPbH, |
586 | 317k | luma_w0,luma_o0, |
587 | 317k | luma_w1,luma_o1, |
588 | 317k | luma_log2WD, bit_depth_L); |
589 | | |
590 | 317k | int16_t* in00 = predSamplesC[0][0]; |
591 | 317k | int16_t* in01 = predSamplesC[0][1]; |
592 | 317k | int16_t* in10 = predSamplesC[1][0]; |
593 | 317k | int16_t* in11 = predSamplesC[1][1]; |
594 | | |
595 | 317k | if (img->get_chroma_format() != de265_chroma_mono) { |
596 | 294k | ctx->acceleration.put_weighted_bipred(pixels[1], stride[1], |
597 | 294k | in00, in01, nCS, nPbW / SubWidthC, nPbH / SubHeightC, |
598 | 294k | chroma0_w0, chroma0_o0, |
599 | 294k | chroma0_w1, chroma0_o1, |
600 | 294k | chroma_log2WD, bit_depth_C); |
601 | 294k | ctx->acceleration.put_weighted_bipred(pixels[2], stride[2], |
602 | 294k | in10, in11, nCS, nPbW / SubWidthC, nPbH / SubHeightC, |
603 | 294k | chroma1_w0, chroma1_o0, |
604 | 294k | chroma1_w1, chroma1_o1, |
605 | 294k | chroma_log2WD, bit_depth_C); |
606 | 294k | } |
607 | 317k | } |
608 | 832k | } |
609 | 1.62M | else if (predFlag[0]==1 || predFlag[1]==1) { |
610 | 1.62M | int l = predFlag[0] ? 0 : 1; |
611 | | |
612 | 1.62M | if (pps->weighted_bipred_flag==0) { |
613 | 987k | ctx->acceleration.put_unweighted_pred(pixels[0], stride[0], |
614 | 987k | predSamplesL[l],nCS, nPbW,nPbH, bit_depth_L); |
615 | | |
616 | 987k | if (img->get_chroma_format() != de265_chroma_mono) { |
617 | 922k | ctx->acceleration.put_unweighted_pred(pixels[1], stride[1], |
618 | 922k | predSamplesC[0][l], nCS, |
619 | 922k | nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
620 | 922k | ctx->acceleration.put_unweighted_pred(pixels[2], stride[2], |
621 | 922k | predSamplesC[1][l], nCS, |
622 | 922k | nPbW / SubWidthC, nPbH / SubHeightC, bit_depth_C); |
623 | 922k | } |
624 | 987k | } |
625 | 642k | else { |
626 | 642k | int refIdx = vi->refIdx[l]; |
627 | | |
628 | 642k | int luma_log2WD = shdr->luma_log2_weight_denom + shift1_L; |
629 | 642k | int chroma_log2WD = shdr->ChromaLog2WeightDenom + shift1_C; |
630 | | |
631 | 642k | int luma_w = shdr->LumaWeight[l][refIdx]; |
632 | 642k | int luma_o = shdr->luma_offset[l][refIdx] * (1<<(offset_shift1_L)); |
633 | | |
634 | 642k | int chroma0_w = shdr->ChromaWeight[l][refIdx][0]; |
635 | 642k | int chroma0_o = shdr->ChromaOffset[l][refIdx][0] * (1<<(offset_shift1_C)); |
636 | 642k | int chroma1_w = shdr->ChromaWeight[l][refIdx][1]; |
637 | 642k | int chroma1_o = shdr->ChromaOffset[l][refIdx][1] * (1<<(offset_shift1_C)); |
638 | | |
639 | 642k | logtrace(LogMotion,"weighted-B-L%d [%d] %d %d %d %dx%d\n", l, refIdx, luma_log2WD-6,luma_w,luma_o,nPbW,nPbH); |
640 | | |
641 | 642k | ctx->acceleration.put_weighted_pred(pixels[0], stride[0], |
642 | 642k | predSamplesL[l],nCS, nPbW,nPbH, |
643 | 642k | luma_w, luma_o, luma_log2WD, bit_depth_L); |
644 | | |
645 | 642k | if (img->get_chroma_format() != de265_chroma_mono) { |
646 | 605k | ctx->acceleration.put_weighted_pred(pixels[1], stride[1], |
647 | 605k | predSamplesC[0][l], nCS, |
648 | 605k | nPbW / SubWidthC, nPbH / SubHeightC, |
649 | 605k | chroma0_w, chroma0_o, chroma_log2WD, bit_depth_C); |
650 | 605k | ctx->acceleration.put_weighted_pred(pixels[2], stride[2], |
651 | 605k | predSamplesC[1][l], nCS, |
652 | 605k | nPbW / SubWidthC, nPbH / SubHeightC, |
653 | 605k | chroma1_w, chroma1_o, chroma_log2WD, bit_depth_C); |
654 | 605k | } |
655 | 642k | } |
656 | 1.62M | } |
657 | 0 | else { |
658 | | // TODO: check why it can actually happen that both predFlags[] are false. |
659 | | // For now, we ignore this and continue decoding. |
660 | |
|
661 | 0 | ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false); |
662 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
663 | 0 | } |
664 | 2.46M | } |
665 | | |
666 | | #if defined(DE265_LOG_TRACE) && 0 |
667 | | logtrace(LogTransform,"MC pixels (luma), position %d %d:\n", xP,yP); |
668 | | |
669 | | for (int y=0;y<nPbH;y++) { |
670 | | logtrace(LogTransform,"MC-y-%d-%d ",xP,yP+y); |
671 | | |
672 | | for (int x=0;x<nPbW;x++) { |
673 | | logtrace(LogTransform,"*%02x ", pixels[0][x+y*stride[0]]); |
674 | | } |
675 | | |
676 | | logtrace(LogTransform,"*\n"); |
677 | | } |
678 | | |
679 | | |
680 | | logtrace(LogTransform,"MC pixels (chroma cb), position %d %d:\n", xP/2,yP/2); |
681 | | |
682 | | for (int y=0;y<nPbH/2;y++) { |
683 | | logtrace(LogTransform,"MC-cb-%d-%d ",xP/2,yP/2+y); |
684 | | |
685 | | for (int x=0;x<nPbW/2;x++) { |
686 | | logtrace(LogTransform,"*%02x ", pixels[1][x+y*stride[1]]); |
687 | | } |
688 | | |
689 | | logtrace(LogTransform,"*\n"); |
690 | | } |
691 | | |
692 | | |
693 | | logtrace(LogTransform,"MC pixels (chroma cr), position %d %d:\n", xP/2,yP/2); |
694 | | |
695 | | for (int y=0;y<nPbH/2;y++) { |
696 | | logtrace(LogTransform,"MC-cr-%d-%d ",xP/2,yP/2+y); |
697 | | |
698 | | for (int x=0;x<nPbW/2;x++) { |
699 | | logtrace(LogTransform,"*%02x ", pixels[2][x+y*stride[2]]); |
700 | | } |
701 | | |
702 | | logtrace(LogTransform,"*\n"); |
703 | | } |
704 | | #endif |
705 | 2.76M | } |
706 | | |
707 | | |
708 | | #ifdef DE265_LOG_TRACE |
709 | | void logmvcand(const PBMotion& p) |
710 | | { |
711 | | for (int v=0;v<2;v++) { |
712 | | if (p.predFlag[v]) { |
713 | | logtrace(LogMotion," %d: %s %d;%d ref=%d\n", v, p.predFlag[v] ? "yes":"no ", |
714 | | p.mv[v].x,p.mv[v].y, p.refIdx[v]); |
715 | | } else { |
716 | | logtrace(LogMotion," %d: %s --;-- ref=--\n", v, p.predFlag[v] ? "yes":"no "); |
717 | | } |
718 | | } |
719 | | } |
720 | | #else |
721 | | #define logmvcand(p) |
722 | | #endif |
723 | | |
724 | | |
725 | | bool PBMotion::operator==(const PBMotion& b) const |
726 | 505k | { |
727 | 505k | const PBMotion& a = *this; |
728 | | |
729 | | // TODO: is this really correct? no check for predFlag? Standard says so... (p.127) |
730 | | |
731 | 1.18M | for (int i=0;i<2;i++) { |
732 | 871k | if (a.predFlag[i] != b.predFlag[i]) return false; |
733 | | |
734 | 788k | if (a.predFlag[i]) { |
735 | 615k | if (a.mv[i].x != b.mv[i].x) return false; |
736 | 546k | if (a.mv[i].y != b.mv[i].y) return false; |
737 | 524k | if (a.refIdx[i] != b.refIdx[i]) return false; |
738 | 524k | } |
739 | 788k | } |
740 | | |
741 | 312k | return true; |
742 | 505k | } |
743 | | |
744 | | |
745 | | class MotionVectorAccess_de265_image : public MotionVectorAccess |
746 | | { |
747 | | public: |
748 | 1.99M | MotionVectorAccess_de265_image(const de265_image* i) : img(i) { } |
749 | | |
750 | 1.99M | enum PartMode get_PartMode(int x,int y) const override { return img->get_PartMode(x,y); } |
751 | 1.58M | const PBMotion& get_mv_info(int x,int y) const override { return img->get_mv_info(x,y); } |
752 | | |
753 | | private: |
754 | | const de265_image* img; |
755 | | }; |
756 | | |
757 | | |
758 | | |
759 | | /* |
760 | | +--+ +--+--+ |
761 | | |B2| |B1|B0| |
762 | | +--+----------------+--+--+ |
763 | | | | |
764 | | | | |
765 | | | | |
766 | | | | |
767 | | | PB | |
768 | | | | |
769 | | | | |
770 | | +--+ | |
771 | | |A1| | |
772 | | +--+-------------------+ |
773 | | |A0| |
774 | | +--+ |
775 | | */ |
776 | | |
777 | | |
778 | | // 8.5.3.1.2 |
779 | | // TODO: check: can we fill the candidate list directly in this function and omit to copy later |
780 | | /* |
781 | | xC/yC: CB position |
782 | | nCS: CB size (probably modified because of singleMCLFlag) |
783 | | xP/yP: PB position (absolute) (probably modified because of singleMCLFlag) |
784 | | singleMCLFlag |
785 | | nPbW/nPbH: PB size |
786 | | partIdx |
787 | | out_cand: merging candidate vectors |
788 | | |
789 | | Add these candidates: |
790 | | - A1 |
791 | | - B1 (if != A1) |
792 | | - B0 (if != B1) |
793 | | - A0 (if != A1) |
794 | | - B2 (if != A1 and != B1) |
795 | | |
796 | | A maximum of 4 candidates are generated. |
797 | | |
798 | | Note 1: For a CB split into two PBs, it does not make sense to merge the |
799 | | second part to the parameters of the first part, since then, we could use 2Nx2N |
800 | | right away. -> Exclude this candidate. |
801 | | */ |
802 | | int derive_spatial_merging_candidates(//const de265_image* img, |
803 | | const MotionVectorAccess& mvaccess, |
804 | | const de265_image* img, |
805 | | int xC, int yC, int nCS, int xP, int yP, |
806 | | uint8_t singleMCLFlag, |
807 | | int nPbW, int nPbH, |
808 | | int partIdx, |
809 | | PBMotion* out_cand, |
810 | | int maxCandidates) |
811 | 1.99M | { |
812 | 1.99M | const pic_parameter_set* pps = &img->get_pps(); |
813 | 1.99M | const int log2_parallel_merge_level = pps->log2_parallel_merge_level; |
814 | | |
815 | 1.99M | enum PartMode PartMode = mvaccess.get_PartMode(xC,yC); |
816 | | |
817 | | /* |
818 | | const int A0 = SpatialMergingCandidates::PRED_A0; |
819 | | const int A1 = SpatialMergingCandidates::PRED_A1; |
820 | | const int B0 = SpatialMergingCandidates::PRED_B0; |
821 | | const int B1 = SpatialMergingCandidates::PRED_B1; |
822 | | const int B2 = SpatialMergingCandidates::PRED_B2; |
823 | | */ |
824 | | |
825 | | // --- A1 --- |
826 | | |
827 | | // a pixel within A1 (bottom right of A1) |
828 | 1.99M | int xA1 = xP-1; |
829 | 1.99M | int yA1 = yP+nPbH-1; |
830 | | |
831 | 1.99M | bool availableA1; |
832 | 1.99M | int idxA1; |
833 | | |
834 | 1.99M | int computed_candidates = 0; |
835 | | |
836 | | // check if candidate is in same motion-estimation region (MER) -> discard |
837 | 1.99M | if ((xP>>log2_parallel_merge_level) == (xA1>>log2_parallel_merge_level) && |
838 | 1.99M | (yP>>log2_parallel_merge_level) == (yA1>>log2_parallel_merge_level)) { |
839 | 113k | availableA1 = false; |
840 | 113k | logtrace(LogMotion,"spatial merging candidate A1: below parallel merge level\n"); |
841 | 113k | } |
842 | | // redundant candidate? (Note 1) -> discard |
843 | 1.88M | else if (// !singleMCLFlag && automatically true when partIdx==1 |
844 | 1.88M | partIdx==1 && |
845 | 1.88M | (PartMode==PART_Nx2N || |
846 | 192k | PartMode==PART_nLx2N || |
847 | 192k | PartMode==PART_nRx2N)) { |
848 | 80.4k | availableA1 = false; |
849 | 80.4k | logtrace(LogMotion,"spatial merging candidate A1: second part ignore\n"); |
850 | 80.4k | } |
851 | | // MV available in A1 |
852 | 1.80M | else { |
853 | 1.80M | availableA1 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA1,yA1); |
854 | 1.80M | if (!availableA1) logtrace(LogMotion,"spatial merging candidate A1: unavailable\n"); |
855 | 1.80M | } |
856 | | |
857 | 1.99M | if (availableA1) { |
858 | 1.58M | idxA1 = computed_candidates++; |
859 | 1.58M | out_cand[idxA1] = mvaccess.get_mv_info(xA1,yA1); |
860 | | |
861 | 1.58M | logtrace(LogMotion,"spatial merging candidate A1:\n"); |
862 | 1.58M | logmvcand(out_cand[idxA1]); |
863 | 1.58M | } |
864 | | |
865 | 1.99M | if (computed_candidates>=maxCandidates) return computed_candidates; |
866 | | |
867 | | |
868 | | // --- B1 --- |
869 | | |
870 | 651k | int xB1 = xP+nPbW-1; |
871 | 651k | int yB1 = yP-1; |
872 | | |
873 | 651k | bool availableB1; |
874 | 651k | int idxB1; |
875 | | |
876 | | // same MER -> discard |
877 | 651k | if ((xP>>log2_parallel_merge_level) == (xB1>>log2_parallel_merge_level) && |
878 | 651k | (yP>>log2_parallel_merge_level) == (yB1>>log2_parallel_merge_level)) { |
879 | 80.4k | availableB1 = false; |
880 | 80.4k | logtrace(LogMotion,"spatial merging candidate B1: below parallel merge level\n"); |
881 | 80.4k | } |
882 | | // redundant candidate (Note 1) -> discard |
883 | 570k | else if (// !singleMCLFlag && automatically true when partIdx==1 |
884 | 570k | partIdx==1 && |
885 | 570k | (PartMode==PART_2NxN || |
886 | 124k | PartMode==PART_2NxnU || |
887 | 124k | PartMode==PART_2NxnD)) { |
888 | 40.7k | availableB1 = false; |
889 | 40.7k | logtrace(LogMotion,"spatial merging candidate B1: second part ignore\n"); |
890 | 40.7k | } |
891 | | // MV available in B1 |
892 | 529k | else { |
893 | 529k | availableB1 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB1,yB1); |
894 | 529k | if (!availableB1) logtrace(LogMotion,"spatial merging candidate B1: unavailable\n"); |
895 | 529k | } |
896 | | |
897 | 651k | if (availableB1) { |
898 | 478k | const PBMotion& b1 = img->get_mv_info(xB1,yB1); |
899 | | |
900 | | // B1 == A1 -> discard B1 |
901 | 478k | if (availableA1 && out_cand[idxA1] == b1) { |
902 | 118k | idxB1 = idxA1; |
903 | 118k | logtrace(LogMotion,"spatial merging candidate B1: redundant to A1\n"); |
904 | 118k | } |
905 | 360k | else { |
906 | 360k | idxB1 = computed_candidates++; |
907 | 360k | out_cand[idxB1] = b1; |
908 | | |
909 | 360k | logtrace(LogMotion,"spatial merging candidate B1:\n"); |
910 | 360k | logmvcand(out_cand[idxB1]); |
911 | 360k | } |
912 | 478k | } |
913 | | |
914 | 651k | if (computed_candidates>=maxCandidates) return computed_candidates; |
915 | | |
916 | | |
917 | | // --- B0 --- |
918 | | |
919 | 338k | int xB0 = xP+nPbW; |
920 | 338k | int yB0 = yP-1; |
921 | | |
922 | 338k | bool availableB0; |
923 | 338k | int idxB0; |
924 | | |
925 | 338k | if ((xP>>log2_parallel_merge_level) == (xB0>>log2_parallel_merge_level) && |
926 | 338k | (yP>>log2_parallel_merge_level) == (yB0>>log2_parallel_merge_level)) { |
927 | 50.2k | availableB0 = false; |
928 | 50.2k | logtrace(LogMotion,"spatial merging candidate B0: below parallel merge level\n"); |
929 | 50.2k | } |
930 | 288k | else { |
931 | 288k | availableB0 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB0,yB0); |
932 | 288k | if (!availableB0) logtrace(LogMotion,"spatial merging candidate B0: unavailable\n"); |
933 | 288k | } |
934 | | |
935 | 338k | if (availableB0) { |
936 | 116k | const PBMotion& b0 = img->get_mv_info(xB0,yB0); |
937 | | |
938 | | // B0 == B1 -> discard B0 |
939 | 116k | if (availableB1 && out_cand[idxB1]==b0) { |
940 | 68.7k | idxB0 = idxB1; |
941 | 68.7k | logtrace(LogMotion,"spatial merging candidate B0: redundant to B1\n"); |
942 | 68.7k | } |
943 | 48.2k | else { |
944 | 48.2k | idxB0 = computed_candidates++; |
945 | 48.2k | out_cand[idxB0] = b0; |
946 | 48.2k | logtrace(LogMotion,"spatial merging candidate B0:\n"); |
947 | 48.2k | logmvcand(out_cand[idxB0]); |
948 | 48.2k | } |
949 | 116k | } |
950 | | |
951 | 338k | if (computed_candidates>=maxCandidates) return computed_candidates; |
952 | | |
953 | | |
954 | | // --- A0 --- |
955 | | |
956 | 298k | int xA0 = xP-1; |
957 | 298k | int yA0 = yP+nPbH; |
958 | | |
959 | 298k | bool availableA0; |
960 | 298k | int idxA0; |
961 | | |
962 | 298k | if ((xP>>log2_parallel_merge_level) == (xA0>>log2_parallel_merge_level) && |
963 | 298k | (yP>>log2_parallel_merge_level) == (yA0>>log2_parallel_merge_level)) { |
964 | 67.6k | availableA0 = false; |
965 | 67.6k | logtrace(LogMotion,"spatial merging candidate A0: below parallel merge level\n"); |
966 | 67.6k | } |
967 | 230k | else { |
968 | 230k | availableA0 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA0,yA0); |
969 | 230k | if (!availableA0) logtrace(LogMotion,"spatial merging candidate A0: unavailable\n"); |
970 | 230k | } |
971 | | |
972 | 298k | if (availableA0) { |
973 | 43.1k | const PBMotion& a0 = img->get_mv_info(xA0,yA0); |
974 | | |
975 | | // A0 == A1 -> discard A0 |
976 | 43.1k | if (availableA1 && out_cand[idxA1]==a0) { |
977 | 30.7k | idxA0 = idxA1; |
978 | 30.7k | logtrace(LogMotion,"spatial merging candidate A0: redundant to A1\n"); |
979 | 30.7k | } |
980 | 12.3k | else { |
981 | 12.3k | idxA0 = computed_candidates++; |
982 | 12.3k | out_cand[idxA0] = a0; |
983 | 12.3k | logtrace(LogMotion,"spatial merging candidate A0:\n"); |
984 | 12.3k | logmvcand(out_cand[idxA0]); |
985 | 12.3k | } |
986 | 43.1k | } |
987 | | |
988 | 298k | if (computed_candidates>=maxCandidates) return computed_candidates; |
989 | | |
990 | | |
991 | | // --- B2 --- |
992 | | |
993 | 288k | int xB2 = xP-1; |
994 | 288k | int yB2 = yP-1; |
995 | | |
996 | 288k | bool availableB2; |
997 | 288k | int idxB2; |
998 | | |
999 | | // if we already have four candidates, do not consider B2 anymore |
1000 | 288k | if (computed_candidates==4) { |
1001 | 240 | availableB2 = false; |
1002 | 240 | logtrace(LogMotion,"spatial merging candidate B2: ignore\n"); |
1003 | 240 | } |
1004 | 288k | else if ((xP>>log2_parallel_merge_level) == (xB2>>log2_parallel_merge_level) && |
1005 | 288k | (yP>>log2_parallel_merge_level) == (yB2>>log2_parallel_merge_level)) { |
1006 | 77.5k | availableB2 = false; |
1007 | 77.5k | logtrace(LogMotion,"spatial merging candidate B2: below parallel merge level\n"); |
1008 | 77.5k | } |
1009 | 211k | else { |
1010 | 211k | availableB2 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB2,yB2); |
1011 | 211k | if (!availableB2) logtrace(LogMotion,"spatial merging candidate B2: unavailable\n"); |
1012 | 211k | } |
1013 | | |
1014 | 288k | if (availableB2) { |
1015 | 125k | const PBMotion& b2 = img->get_mv_info(xB2,yB2); |
1016 | | |
1017 | | // B2 == B1 -> discard B2 |
1018 | 125k | if (availableB1 && out_cand[idxB1]==b2) { |
1019 | 82.8k | idxB2 = idxB1; |
1020 | 82.8k | logtrace(LogMotion,"spatial merging candidate B2: redundant to B1\n"); |
1021 | 82.8k | } |
1022 | | // B2 == A1 -> discard B2 |
1023 | 42.8k | else if (availableA1 && out_cand[idxA1]==b2) { |
1024 | 11.3k | idxB2 = idxA1; |
1025 | 11.3k | logtrace(LogMotion,"spatial merging candidate B2: redundant to A1\n"); |
1026 | 11.3k | } |
1027 | 31.4k | else { |
1028 | 31.4k | idxB2 = computed_candidates++; |
1029 | 31.4k | out_cand[idxB2] = b2; |
1030 | 31.4k | logtrace(LogMotion,"spatial merging candidate B2:\n"); |
1031 | 31.4k | logmvcand(out_cand[idxB2]); |
1032 | 31.4k | } |
1033 | 125k | } |
1034 | | |
1035 | 288k | return computed_candidates; |
1036 | 298k | } |
1037 | | |
1038 | | |
1039 | | // 8.5.3.1.4 |
1040 | | void derive_zero_motion_vector_candidates(const slice_segment_header* shdr, |
1041 | | PBMotion* out_mergeCandList, |
1042 | | int* inout_numCurrMergeCand, |
1043 | | int maxCandidates) |
1044 | 1.99M | { |
1045 | 1.99M | logtrace(LogMotion,"derive_zero_motion_vector_candidates\n"); |
1046 | | |
1047 | 1.99M | int numRefIdx; |
1048 | | |
1049 | 1.99M | if (shdr->slice_type==SLICE_TYPE_P) { |
1050 | 272k | numRefIdx = shdr->num_ref_idx_l0_active; |
1051 | 272k | } |
1052 | 1.72M | else { |
1053 | 1.72M | numRefIdx = libde265_min(shdr->num_ref_idx_l0_active, |
1054 | 1.72M | shdr->num_ref_idx_l1_active); |
1055 | 1.72M | } |
1056 | | |
1057 | | |
1058 | | //int numInputMergeCand = *inout_numMergeCand; |
1059 | 1.99M | int zeroIdx = 0; |
1060 | | |
1061 | 2.29M | while (*inout_numCurrMergeCand < maxCandidates) { |
1062 | | // 1. |
1063 | | |
1064 | 301k | logtrace(LogMotion,"zeroIdx:%d numRefIdx:%d\n", zeroIdx, numRefIdx); |
1065 | | |
1066 | 301k | PBMotion* newCand = &out_mergeCandList[*inout_numCurrMergeCand]; |
1067 | | |
1068 | 301k | const int refIdx = (zeroIdx < numRefIdx) ? zeroIdx : 0; |
1069 | | |
1070 | 301k | if (shdr->slice_type==SLICE_TYPE_P) { |
1071 | 35.9k | newCand->refIdx[0] = refIdx; |
1072 | 35.9k | newCand->refIdx[1] = -1; |
1073 | 35.9k | newCand->predFlag[0] = 1; |
1074 | 35.9k | newCand->predFlag[1] = 0; |
1075 | 35.9k | } |
1076 | 265k | else { |
1077 | 265k | newCand->refIdx[0] = refIdx; |
1078 | 265k | newCand->refIdx[1] = refIdx; |
1079 | 265k | newCand->predFlag[0] = 1; |
1080 | 265k | newCand->predFlag[1] = 1; |
1081 | 265k | } |
1082 | | |
1083 | 301k | newCand->mv[0].x = 0; |
1084 | 301k | newCand->mv[0].y = 0; |
1085 | 301k | newCand->mv[1].x = 0; |
1086 | 301k | newCand->mv[1].y = 0; |
1087 | | |
1088 | 301k | (*inout_numCurrMergeCand)++; |
1089 | | |
1090 | | // 2. |
1091 | | |
1092 | 301k | zeroIdx++; |
1093 | 301k | } |
1094 | 1.99M | } |
1095 | | |
1096 | | |
1097 | | bool scale_mv(MotionVector* out_mv, MotionVector mv, int colDist, int currDist) |
1098 | 20.0k | { |
1099 | 20.0k | int td = Clip3(-128,127, colDist); |
1100 | 20.0k | int tb = Clip3(-128,127, currDist); |
1101 | | |
1102 | 20.0k | if (td==0) { |
1103 | 2.64k | *out_mv = mv; |
1104 | 2.64k | return false; |
1105 | 2.64k | } |
1106 | 17.4k | else { |
1107 | 17.4k | int tx = (16384 + (abs_value(td)>>1)) / td; |
1108 | 17.4k | int distScaleFactor = Clip3(-4096,4095, (tb*tx+32)>>6); |
1109 | 17.4k | out_mv->x = Clip3(-32768,32767, |
1110 | 17.4k | Sign(distScaleFactor*mv.x)*((abs_value(distScaleFactor*mv.x)+127)>>8)); |
1111 | 17.4k | out_mv->y = Clip3(-32768,32767, |
1112 | 17.4k | Sign(distScaleFactor*mv.y)*((abs_value(distScaleFactor*mv.y)+127)>>8)); |
1113 | 17.4k | return true; |
1114 | 17.4k | } |
1115 | 20.0k | } |
1116 | | |
1117 | | |
1118 | | // (L1003) 8.5.3.2.8 |
1119 | | |
1120 | | void derive_collocated_motion_vectors(base_context* ctx, |
1121 | | de265_image* img, |
1122 | | const slice_segment_header* shdr, |
1123 | | int xP,int yP, |
1124 | | int colPic, |
1125 | | int xColPb,int yColPb, |
1126 | | int refIdxLX, // (always 0 for merge mode) |
1127 | | int X, |
1128 | | MotionVector* out_mvLXCol, |
1129 | | uint8_t* out_availableFlagLXCol) |
1130 | 887k | { |
1131 | 887k | logtrace(LogMotion,"derive_collocated_motion_vectors %d;%d\n",xP,yP); |
1132 | | |
1133 | | |
1134 | | // get collocated image and the prediction mode at the collocated position |
1135 | | |
1136 | 887k | assert(ctx->has_image(colPic)); |
1137 | 887k | const de265_image* colImg = ctx->get_image(colPic); |
1138 | | |
1139 | | // check for access outside image area |
1140 | | |
1141 | 887k | if (xColPb >= colImg->get_width() || |
1142 | 887k | yColPb >= colImg->get_height()) { |
1143 | 0 | ctx->add_warning(DE265_WARNING_COLLOCATED_MOTION_VECTOR_OUTSIDE_IMAGE_AREA, false); |
1144 | 0 | *out_availableFlagLXCol = 0; |
1145 | 0 | return; |
1146 | 0 | } |
1147 | | |
1148 | 887k | enum PredMode predMode = colImg->get_pred_mode(xColPb,yColPb); |
1149 | | |
1150 | | |
1151 | | // collocated block is Intra -> no collocated MV |
1152 | | |
1153 | 887k | if (predMode == MODE_INTRA) { |
1154 | 834k | out_mvLXCol->x = 0; |
1155 | 834k | out_mvLXCol->y = 0; |
1156 | 834k | *out_availableFlagLXCol = 0; |
1157 | 834k | return; |
1158 | 834k | } |
1159 | | |
1160 | | |
1161 | 53.2k | logtrace(LogMotion,"colPic:%d (POC=%d) X:%d refIdxLX:%d refpiclist:%d\n", |
1162 | 53.2k | colPic, |
1163 | 53.2k | colImg->PicOrderCntVal, |
1164 | 53.2k | X,refIdxLX,shdr->RefPicList[X][refIdxLX]); |
1165 | | |
1166 | | |
1167 | | // collocated reference image is unavailable -> no collocated MV |
1168 | | |
1169 | 53.2k | if (colImg->integrity == INTEGRITY_UNAVAILABLE_REFERENCE) { |
1170 | 0 | out_mvLXCol->x = 0; |
1171 | 0 | out_mvLXCol->y = 0; |
1172 | 0 | *out_availableFlagLXCol = 0; |
1173 | 0 | return; |
1174 | 0 | } |
1175 | | |
1176 | | |
1177 | | // get the collocated MV |
1178 | | |
1179 | 53.2k | const PBMotion& mvi = colImg->get_mv_info(xColPb,yColPb); |
1180 | 53.2k | int listCol; |
1181 | 53.2k | int refIdxCol; |
1182 | 53.2k | MotionVector mvCol; |
1183 | | |
1184 | 53.2k | logtrace(LogMotion,"read MVI %d;%d:\n",xColPb,yColPb); |
1185 | 53.2k | logmvcand(mvi); |
1186 | | |
1187 | | |
1188 | | // collocated MV uses only L1 -> use L1 |
1189 | 53.2k | if (mvi.predFlag[0]==0) { |
1190 | 15.1k | mvCol = mvi.mv[1]; |
1191 | 15.1k | refIdxCol = mvi.refIdx[1]; |
1192 | 15.1k | listCol = 1; |
1193 | 15.1k | } |
1194 | | // collocated MV uses only L0 -> use L0 |
1195 | 38.0k | else if (mvi.predFlag[1]==0) { |
1196 | 16.5k | mvCol = mvi.mv[0]; |
1197 | 16.5k | refIdxCol = mvi.refIdx[0]; |
1198 | 16.5k | listCol = 0; |
1199 | 16.5k | } |
1200 | | // collocated MV uses L0 and L1 |
1201 | 21.4k | else { |
1202 | 21.4k | bool allRefFramesBeforeCurrentFrame = true; |
1203 | | |
1204 | 21.4k | const int currentPOC = img->PicOrderCntVal; |
1205 | | |
1206 | | // all reference POCs earlier than current POC (list 1) |
1207 | | // Test L1 first, because there is a higher change to find a future reference frame. |
1208 | | |
1209 | 50.6k | for (int rIdx=0; rIdx<shdr->num_ref_idx_l1_active && allRefFramesBeforeCurrentFrame; rIdx++) |
1210 | 29.1k | { |
1211 | 29.1k | const de265_image* refimg = ctx->get_image(shdr->RefPicList[1][rIdx]); |
1212 | 29.1k | int refPOC = refimg->PicOrderCntVal; |
1213 | | |
1214 | 29.1k | if (refPOC > currentPOC) { |
1215 | 5.32k | allRefFramesBeforeCurrentFrame = false; |
1216 | 5.32k | } |
1217 | 29.1k | } |
1218 | | |
1219 | | // all reference POCs earlier than current POC (list 0) |
1220 | | |
1221 | 48.3k | for (int rIdx=0; rIdx<shdr->num_ref_idx_l0_active && allRefFramesBeforeCurrentFrame; rIdx++) |
1222 | 26.8k | { |
1223 | 26.8k | const de265_image* refimg = ctx->get_image(shdr->RefPicList[0][rIdx]); |
1224 | 26.8k | int refPOC = refimg->PicOrderCntVal; |
1225 | | |
1226 | 26.8k | if (refPOC > currentPOC) { |
1227 | 3.22k | allRefFramesBeforeCurrentFrame = false; |
1228 | 3.22k | } |
1229 | 26.8k | } |
1230 | | |
1231 | | |
1232 | | /* TODO: What is the rationale behind this ??? |
1233 | | |
1234 | | My guess: |
1235 | | when there are images before the current frame (most probably in L0) and images after |
1236 | | the current frame (most probably in L1), we take the reference in the opposite |
1237 | | direction than where the collocated frame is positioned in the hope that the distance |
1238 | | to the current frame will be smaller and thus give a better prediction. |
1239 | | |
1240 | | If all references point into the past, we cannot say much about the temporal order or |
1241 | | L0,L1 and thus take over both parts. |
1242 | | */ |
1243 | | |
1244 | 21.4k | if (allRefFramesBeforeCurrentFrame) { |
1245 | 12.9k | mvCol = mvi.mv[X]; |
1246 | 12.9k | refIdxCol = mvi.refIdx[X]; |
1247 | 12.9k | listCol = X; |
1248 | 12.9k | } |
1249 | 8.54k | else { |
1250 | 8.54k | int N = shdr->collocated_from_l0_flag; |
1251 | 8.54k | mvCol = mvi.mv[N]; |
1252 | 8.54k | refIdxCol = mvi.refIdx[N]; |
1253 | 8.54k | listCol = N; |
1254 | 8.54k | } |
1255 | 21.4k | } |
1256 | | |
1257 | | |
1258 | | |
1259 | 53.2k | int slice_hdr_idx = colImg->get_SliceHeaderIndex(xColPb,yColPb); |
1260 | 53.2k | if (slice_hdr_idx >= colImg->slices.size()) { |
1261 | 0 | ctx->add_warning(DE265_WARNING_INVALID_SLICE_HEADER_INDEX_ACCESS, false); |
1262 | |
|
1263 | 0 | *out_availableFlagLXCol = 0; |
1264 | 0 | out_mvLXCol->x = 0; |
1265 | 0 | out_mvLXCol->y = 0; |
1266 | 0 | return; |
1267 | 0 | } |
1268 | | |
1269 | 53.2k | const slice_segment_header* colShdr = colImg->slices[ colImg->get_SliceHeaderIndex(xColPb,yColPb) ]; |
1270 | | |
1271 | 53.2k | if (shdr->LongTermRefPic[X][refIdxLX] != |
1272 | 53.2k | colShdr->LongTermRefPic[listCol][refIdxCol]) { |
1273 | 8.85k | *out_availableFlagLXCol = 0; |
1274 | 8.85k | out_mvLXCol->x = 0; |
1275 | 8.85k | out_mvLXCol->y = 0; |
1276 | 8.85k | } |
1277 | 44.3k | else { |
1278 | 44.3k | *out_availableFlagLXCol = 1; |
1279 | | |
1280 | 44.3k | const bool isLongTerm = shdr->LongTermRefPic[X][refIdxLX]; |
1281 | | |
1282 | 44.3k | int colDist = colImg->PicOrderCntVal - colShdr->RefPicList_POC[listCol][refIdxCol]; |
1283 | 44.3k | int currDist = img->PicOrderCntVal - shdr->RefPicList_POC[X][refIdxLX]; |
1284 | | |
1285 | 44.3k | logtrace(LogMotion,"COLPOCDIFF %d %d [%d %d / %d %d]\n",colDist, currDist, |
1286 | 44.3k | colImg->PicOrderCntVal, colShdr->RefPicList_POC[listCol][refIdxCol], |
1287 | 44.3k | img->PicOrderCntVal, shdr->RefPicList_POC[X][refIdxLX] |
1288 | 44.3k | ); |
1289 | | |
1290 | 44.3k | if (isLongTerm || colDist == currDist) { |
1291 | 37.8k | *out_mvLXCol = mvCol; |
1292 | 37.8k | } |
1293 | 6.48k | else { |
1294 | 6.48k | if (!scale_mv(out_mvLXCol, mvCol, colDist, currDist)) { |
1295 | 425 | ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false); |
1296 | 425 | img->integrity = INTEGRITY_DECODING_ERRORS; |
1297 | 425 | } |
1298 | | |
1299 | 6.48k | logtrace(LogMotion,"scale: %d;%d to %d;%d\n", |
1300 | 6.48k | mvCol.x,mvCol.y, out_mvLXCol->x,out_mvLXCol->y); |
1301 | 6.48k | } |
1302 | 44.3k | } |
1303 | 53.2k | } |
1304 | | |
1305 | | |
1306 | | // 8.5.3.1.7 |
1307 | | void derive_temporal_luma_vector_prediction(base_context* ctx, |
1308 | | de265_image* img, |
1309 | | const slice_segment_header* shdr, |
1310 | | int xP,int yP, |
1311 | | int nPbW,int nPbH, |
1312 | | int refIdxL, |
1313 | | int X, // which MV (L0/L1) to get |
1314 | | MotionVector* out_mvLXCol, |
1315 | | uint8_t* out_availableFlagLXCol) |
1316 | 1.17M | { |
1317 | | // --- no temporal MVP -> exit --- |
1318 | | |
1319 | 1.17M | if (shdr->slice_temporal_mvp_enabled_flag == 0) { |
1320 | 537k | out_mvLXCol->x = 0; |
1321 | 537k | out_mvLXCol->y = 0; |
1322 | 537k | *out_availableFlagLXCol = 0; |
1323 | 537k | return; |
1324 | 537k | } |
1325 | | |
1326 | | |
1327 | | // --- find collocated reference image --- |
1328 | | |
1329 | 632k | int Log2CtbSizeY = img->get_sps().Log2CtbSizeY; |
1330 | | |
1331 | 632k | int colPic; // TODO: this is the same for the whole slice. We can precompute it. |
1332 | | |
1333 | 632k | if (shdr->slice_type == SLICE_TYPE_B && |
1334 | 632k | shdr->collocated_from_l0_flag == 0) |
1335 | 120k | { |
1336 | 120k | logtrace(LogMotion,"collocated L1 ref_idx=%d\n",shdr->collocated_ref_idx); |
1337 | | |
1338 | 120k | colPic = shdr->RefPicList[1][ shdr->collocated_ref_idx ]; |
1339 | 120k | } |
1340 | 512k | else |
1341 | 512k | { |
1342 | 512k | logtrace(LogMotion,"collocated L0 ref_idx=%d\n",shdr->collocated_ref_idx); |
1343 | | |
1344 | 512k | colPic = shdr->RefPicList[0][ shdr->collocated_ref_idx ]; |
1345 | 512k | } |
1346 | | |
1347 | | |
1348 | | // check whether collocated reference picture exists |
1349 | | |
1350 | 632k | if (!ctx->has_image(colPic)) { |
1351 | 0 | out_mvLXCol->x = 0; |
1352 | 0 | out_mvLXCol->y = 0; |
1353 | 0 | *out_availableFlagLXCol = 0; |
1354 | |
|
1355 | 0 | ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false); |
1356 | 0 | return; |
1357 | 0 | } |
1358 | | |
1359 | | |
1360 | | // --- get collocated MV either at bottom-right corner or from center of PB --- |
1361 | | |
1362 | 632k | int xColPb,yColPb; |
1363 | 632k | int yColBr = yP + nPbH; // bottom right collocated motion vector position |
1364 | 632k | int xColBr = xP + nPbW; |
1365 | | |
1366 | | /* If neighboring pixel at bottom-right corner is in the same CTB-row and inside the image, |
1367 | | use this (reduced down to 16 pixels resolution) as collocated MV position. |
1368 | | |
1369 | | Note: see 2014, Sze, Sect. 5.2.1.2 why candidate C0 is excluded when on another CTB-row. |
1370 | | This is to reduce the memory bandwidth requirements. |
1371 | | */ |
1372 | 632k | if ((yP>>Log2CtbSizeY) == (yColBr>>Log2CtbSizeY) && |
1373 | 632k | xColBr < img->get_sps().pic_width_in_luma_samples && |
1374 | 632k | yColBr < img->get_sps().pic_height_in_luma_samples) |
1375 | 260k | { |
1376 | 260k | xColPb = xColBr & ~0x0F; // reduce resolution of collocated motion-vectors to 16 pixels grid |
1377 | 260k | yColPb = yColBr & ~0x0F; |
1378 | | |
1379 | 260k | derive_collocated_motion_vectors(ctx,img,shdr, xP,yP, colPic, xColPb,yColPb, refIdxL, X, |
1380 | 260k | out_mvLXCol, out_availableFlagLXCol); |
1381 | 260k | } |
1382 | 371k | else |
1383 | 371k | { |
1384 | 371k | out_mvLXCol->x = 0; |
1385 | 371k | out_mvLXCol->y = 0; |
1386 | 371k | *out_availableFlagLXCol = 0; |
1387 | 371k | } |
1388 | | |
1389 | | |
1390 | 632k | if (*out_availableFlagLXCol==0) { |
1391 | | |
1392 | 626k | int xColCtr = xP+(nPbW>>1); |
1393 | 626k | int yColCtr = yP+(nPbH>>1); |
1394 | | |
1395 | 626k | xColPb = xColCtr & ~0x0F; // reduce resolution of collocated motion-vectors to 16 pixels grid |
1396 | 626k | yColPb = yColCtr & ~0x0F; |
1397 | | |
1398 | 626k | derive_collocated_motion_vectors(ctx,img,shdr, xP,yP, colPic, xColPb,yColPb, refIdxL, X, |
1399 | 626k | out_mvLXCol, out_availableFlagLXCol); |
1400 | 626k | } |
1401 | 632k | } |
1402 | | |
1403 | | |
1404 | | static int table_8_19[2][12] = { |
1405 | | { 0,1,0,2,1,2,0,3,1,3,2,3 }, |
1406 | | { 1,0,2,0,2,1,3,0,3,1,3,2 } |
1407 | | }; |
1408 | | |
1409 | | // 8.5.3.1.3 |
1410 | | /* Note (TODO): during decoding, we know which of the candidates we will select. |
1411 | | + Hence, we do not really have to generate the other ones... |
1412 | | + */ |
1413 | | void derive_combined_bipredictive_merging_candidates(const base_context* ctx, |
1414 | | const slice_segment_header* shdr, |
1415 | | PBMotion* inout_mergeCandList, |
1416 | | int* inout_numMergeCand, |
1417 | | int maxCandidates) |
1418 | 1.72M | { |
1419 | 1.72M | if (*inout_numMergeCand>1 && *inout_numMergeCand < maxCandidates) { |
1420 | 19.3k | int numOrigMergeCand = *inout_numMergeCand; |
1421 | | |
1422 | 19.3k | int numInputMergeCand = *inout_numMergeCand; |
1423 | 19.3k | int combIdx = 0; |
1424 | 19.3k | uint8_t combStop = false; |
1425 | | |
1426 | 63.5k | while (!combStop) { |
1427 | 44.2k | int l0CandIdx = table_8_19[0][combIdx]; |
1428 | 44.2k | int l1CandIdx = table_8_19[1][combIdx]; |
1429 | | |
1430 | 44.2k | if (l0CandIdx >= numInputMergeCand || |
1431 | 44.2k | l1CandIdx >= numInputMergeCand) { |
1432 | 0 | assert(false); // bitstream error -> TODO: conceal error |
1433 | 0 | } |
1434 | | |
1435 | 44.2k | PBMotion& l0Cand = inout_mergeCandList[l0CandIdx]; |
1436 | 44.2k | PBMotion& l1Cand = inout_mergeCandList[l1CandIdx]; |
1437 | | |
1438 | 44.2k | logtrace(LogMotion,"add bipredictive merging candidate (combIdx:%d)\n",combIdx); |
1439 | 44.2k | logtrace(LogMotion,"l0Cand:\n"); logmvcand(l0Cand); |
1440 | 44.2k | logtrace(LogMotion,"l1Cand:\n"); logmvcand(l1Cand); |
1441 | | |
1442 | 44.2k | const de265_image* img0 = l0Cand.predFlag[0] ? ctx->get_image(shdr->RefPicList[0][l0Cand.refIdx[0]]) : NULL; |
1443 | 44.2k | const de265_image* img1 = l1Cand.predFlag[1] ? ctx->get_image(shdr->RefPicList[1][l1Cand.refIdx[1]]) : NULL; |
1444 | | |
1445 | 44.2k | if (l0Cand.predFlag[0] && !img0) { |
1446 | 0 | return; // TODO error |
1447 | 0 | } |
1448 | | |
1449 | 44.2k | if (l1Cand.predFlag[1] && !img1) { |
1450 | 0 | return; // TODO error |
1451 | 0 | } |
1452 | | |
1453 | 44.2k | if (l0Cand.predFlag[0] && l1Cand.predFlag[1] && |
1454 | 44.2k | (img0->PicOrderCntVal != img1->PicOrderCntVal || |
1455 | 23.5k | l0Cand.mv[0].x != l1Cand.mv[1].x || |
1456 | 23.5k | l0Cand.mv[0].y != l1Cand.mv[1].y)) { |
1457 | 12.2k | PBMotion& p = inout_mergeCandList[ *inout_numMergeCand ]; |
1458 | 12.2k | p.refIdx[0] = l0Cand.refIdx[0]; |
1459 | 12.2k | p.refIdx[1] = l1Cand.refIdx[1]; |
1460 | 12.2k | p.predFlag[0] = l0Cand.predFlag[0]; |
1461 | 12.2k | p.predFlag[1] = l1Cand.predFlag[1]; |
1462 | 12.2k | p.mv[0] = l0Cand.mv[0]; |
1463 | 12.2k | p.mv[1] = l1Cand.mv[1]; |
1464 | 12.2k | (*inout_numMergeCand)++; |
1465 | | |
1466 | 12.2k | logtrace(LogMotion,"result:\n"); |
1467 | 12.2k | logmvcand(p); |
1468 | 12.2k | } |
1469 | | |
1470 | 44.2k | combIdx++; |
1471 | 44.2k | if (combIdx == numOrigMergeCand*(numOrigMergeCand-1) || |
1472 | 44.2k | *inout_numMergeCand == maxCandidates) { |
1473 | 19.3k | combStop = true; |
1474 | 19.3k | } |
1475 | 44.2k | } |
1476 | 19.3k | } |
1477 | 1.72M | } |
1478 | | |
1479 | | |
1480 | | // 8.5.3.1.1 |
1481 | | |
1482 | | void get_merge_candidate_list_without_step_9(base_context* ctx, |
1483 | | const slice_segment_header* shdr, |
1484 | | const MotionVectorAccess& mvaccess, |
1485 | | de265_image* img, |
1486 | | int xC,int yC, int xP,int yP, |
1487 | | int nCS, int nPbW,int nPbH, int partIdx, |
1488 | | int max_merge_idx, |
1489 | | PBMotion* mergeCandList) |
1490 | 1.99M | { |
1491 | | |
1492 | | //int xOrigP = xP; |
1493 | | //int yOrigP = yP; |
1494 | 1.99M | int nOrigPbW = nPbW; |
1495 | 1.99M | int nOrigPbH = nPbH; |
1496 | | |
1497 | 1.99M | int singleMCLFlag; // single merge-candidate-list (MCL) flag |
1498 | | |
1499 | | /* Use single MCL for CBs of size 8x8, except when parallel-merge-level is at 4x4. |
1500 | | Without this flag, PBs smaller than 8x8 would not receive as much merging candidates. |
1501 | | Having additional candidates might have these advantages: |
1502 | | - coding MVs for these small PBs is expensive, and |
1503 | | - since the PBs are not far away from a proper (neighboring) merging candidate, |
1504 | | the quality of the candidates will still be good. |
1505 | | */ |
1506 | 1.99M | singleMCLFlag = (img->get_pps().log2_parallel_merge_level > 2 && nCS==8); |
1507 | | |
1508 | 1.99M | if (singleMCLFlag) { |
1509 | 773k | xP=xC; |
1510 | 773k | yP=yC; |
1511 | 773k | nPbW=nCS; |
1512 | 773k | nPbH=nCS; |
1513 | 773k | partIdx=0; |
1514 | 773k | } |
1515 | | |
1516 | 1.99M | int maxCandidates = max_merge_idx+1; |
1517 | | //MotionVectorSpec mergeCandList[5]; |
1518 | 1.99M | int numMergeCand=0; |
1519 | | |
1520 | | // --- spatial merge candidates |
1521 | | |
1522 | 1.99M | numMergeCand = derive_spatial_merging_candidates(mvaccess, |
1523 | 1.99M | img, xC,yC, nCS, xP,yP, singleMCLFlag, |
1524 | 1.99M | nPbW,nPbH,partIdx, mergeCandList, |
1525 | 1.99M | maxCandidates); |
1526 | | |
1527 | | // --- collocated merge candidate |
1528 | 1.99M | if (numMergeCand < maxCandidates) { |
1529 | 263k | int refIdxCol[2] = { 0,0 }; |
1530 | | |
1531 | 263k | MotionVector mvCol[2]; |
1532 | 263k | uint8_t predFlagLCol[2]; |
1533 | 263k | derive_temporal_luma_vector_prediction(ctx,img,shdr, xP,yP,nPbW,nPbH, |
1534 | 263k | refIdxCol[0],0, &mvCol[0], |
1535 | 263k | &predFlagLCol[0]); |
1536 | | |
1537 | 263k | uint8_t availableFlagCol = predFlagLCol[0]; |
1538 | 263k | predFlagLCol[1] = 0; |
1539 | | |
1540 | 263k | if (shdr->slice_type == SLICE_TYPE_B) { |
1541 | 233k | derive_temporal_luma_vector_prediction(ctx,img,shdr, |
1542 | 233k | xP,yP,nPbW,nPbH, refIdxCol[1],1, &mvCol[1], |
1543 | 233k | &predFlagLCol[1]); |
1544 | 233k | availableFlagCol |= predFlagLCol[1]; |
1545 | 233k | } |
1546 | | |
1547 | | |
1548 | 263k | if (availableFlagCol) { |
1549 | 7.56k | PBMotion* colVec = &mergeCandList[numMergeCand++]; |
1550 | | |
1551 | 7.56k | colVec->mv[0] = mvCol[0]; |
1552 | 7.56k | colVec->mv[1] = mvCol[1]; |
1553 | 7.56k | colVec->predFlag[0] = predFlagLCol[0]; |
1554 | 7.56k | colVec->predFlag[1] = predFlagLCol[1]; |
1555 | 7.56k | colVec->refIdx[0] = refIdxCol[0]; |
1556 | 7.56k | colVec->refIdx[1] = refIdxCol[1]; |
1557 | 7.56k | } |
1558 | 263k | } |
1559 | | |
1560 | | |
1561 | | // --- bipredictive merge candidates --- |
1562 | | |
1563 | 1.99M | if (shdr->slice_type == SLICE_TYPE_B) { |
1564 | 1.72M | derive_combined_bipredictive_merging_candidates(ctx, shdr, |
1565 | 1.72M | mergeCandList, &numMergeCand, maxCandidates); |
1566 | 1.72M | } |
1567 | | |
1568 | | |
1569 | | // --- zero-vector merge candidates --- |
1570 | | |
1571 | 1.99M | derive_zero_motion_vector_candidates(shdr, mergeCandList, &numMergeCand, maxCandidates); |
1572 | | |
1573 | | |
1574 | 1.99M | logtrace(LogMotion,"mergeCandList:\n"); |
1575 | 9.37M | for (int i=0;i<shdr->MaxNumMergeCand;i++) |
1576 | 7.38M | { |
1577 | | //logtrace(LogMotion, " %d:%s\n", i, i==merge_idx ? " SELECTED":""); |
1578 | 7.38M | logmvcand(mergeCandList[i]); |
1579 | 7.38M | } |
1580 | 1.99M | } |
1581 | | |
1582 | | |
1583 | | |
1584 | | void get_merge_candidate_list(base_context* ctx, |
1585 | | const slice_segment_header* shdr, |
1586 | | de265_image* img, |
1587 | | int xC,int yC, int xP,int yP, |
1588 | | int nCS, int nPbW,int nPbH, int partIdx, |
1589 | | PBMotion* mergeCandList) |
1590 | 0 | { |
1591 | 0 | int max_merge_idx = 5-shdr->five_minus_max_num_merge_cand -1; |
1592 | |
|
1593 | 0 | get_merge_candidate_list_without_step_9(ctx, shdr, |
1594 | 0 | MotionVectorAccess_de265_image(img), img, |
1595 | 0 | xC,yC,xP,yP,nCS,nPbW,nPbH, partIdx, |
1596 | 0 | max_merge_idx, mergeCandList); |
1597 | | |
1598 | | // 9. for encoder: modify all merge candidates |
1599 | |
|
1600 | 0 | for (int i=0;i<=max_merge_idx;i++) { |
1601 | 0 | if (mergeCandList[i].predFlag[0] && |
1602 | 0 | mergeCandList[i].predFlag[1] && |
1603 | 0 | nPbW+nPbH==12) |
1604 | 0 | { |
1605 | 0 | mergeCandList[i].refIdx[1] = -1; |
1606 | 0 | mergeCandList[i].predFlag[1] = 0; |
1607 | 0 | } |
1608 | 0 | } |
1609 | 0 | } |
1610 | | |
1611 | | |
1612 | | void derive_luma_motion_merge_mode(base_context* ctx, |
1613 | | const slice_segment_header* shdr, |
1614 | | de265_image* img, |
1615 | | int xC,int yC, int xP,int yP, |
1616 | | int nCS, int nPbW,int nPbH, int partIdx, |
1617 | | int merge_idx, |
1618 | | PBMotion* out_vi) |
1619 | 1.99M | { |
1620 | 1.99M | PBMotion mergeCandList[5]; |
1621 | | |
1622 | 1.99M | get_merge_candidate_list_without_step_9(ctx, shdr, |
1623 | 1.99M | MotionVectorAccess_de265_image(img), img, |
1624 | 1.99M | xC,yC,xP,yP,nCS,nPbW,nPbH, partIdx, |
1625 | 1.99M | merge_idx, mergeCandList); |
1626 | | |
1627 | | |
1628 | 1.99M | *out_vi = mergeCandList[merge_idx]; |
1629 | | |
1630 | | // 8.5.3.1.1 / 9. |
1631 | | |
1632 | 1.99M | if (out_vi->predFlag[0] && out_vi->predFlag[1] && nPbW+nPbH==12) { |
1633 | 146k | out_vi->refIdx[1] = -1; |
1634 | 146k | out_vi->predFlag[1] = 0; |
1635 | 146k | } |
1636 | 1.99M | } |
1637 | | |
1638 | | |
1639 | | // 8.5.3.1.6 |
1640 | | void derive_spatial_luma_vector_prediction(base_context* ctx, |
1641 | | de265_image* img, |
1642 | | const slice_segment_header* shdr, |
1643 | | int xC,int yC,int nCS,int xP,int yP, |
1644 | | int nPbW,int nPbH, int X, |
1645 | | int refIdxLX, int partIdx, |
1646 | | uint8_t out_availableFlagLXN[2], |
1647 | | MotionVector out_mvLXN[2]) |
1648 | 978k | { |
1649 | 978k | if (refIdxLX >= MAX_NUM_REF_PICS) { |
1650 | 0 | ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false); |
1651 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
1652 | |
|
1653 | 0 | out_availableFlagLXN[0] = false; |
1654 | 0 | out_availableFlagLXN[1] = false; |
1655 | 0 | out_mvLXN[0] = MotionVector(); |
1656 | 0 | out_mvLXN[1] = MotionVector(); |
1657 | 0 | return; |
1658 | 0 | } |
1659 | | |
1660 | 978k | int isScaledFlagLX = 0; |
1661 | | |
1662 | 978k | const int A=0; |
1663 | 978k | const int B=1; |
1664 | | |
1665 | 978k | out_availableFlagLXN[A] = 0; |
1666 | 978k | out_availableFlagLXN[B] = 0; |
1667 | | |
1668 | | |
1669 | | // --- A --- |
1670 | | |
1671 | | // 1. |
1672 | | |
1673 | 978k | int xA[2], yA[2]; |
1674 | 978k | xA[0] = xP-1; |
1675 | 978k | yA[0] = yP + nPbH; |
1676 | 978k | xA[1] = xA[0]; |
1677 | 978k | yA[1] = yA[0]-1; |
1678 | | |
1679 | | // 2. |
1680 | | |
1681 | 978k | out_availableFlagLXN[A] = 0; |
1682 | 978k | out_mvLXN[A].x = 0; |
1683 | 978k | out_mvLXN[A].y = 0; |
1684 | | |
1685 | | // 3. / 4. |
1686 | | |
1687 | 978k | bool availableA[2]; |
1688 | 978k | availableA[0] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA[0],yA[0]); |
1689 | 978k | availableA[1] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA[1],yA[1]); |
1690 | | |
1691 | | // 5. |
1692 | | |
1693 | 978k | if (availableA[0] || availableA[1]) { |
1694 | 784k | isScaledFlagLX = 1; |
1695 | 784k | } |
1696 | | |
1697 | | // 6. test A0 and A1 (Ak) |
1698 | | |
1699 | 978k | int refIdxA=-1; |
1700 | | |
1701 | | // the POC we want to reference in this PB |
1702 | 978k | const de265_image* tmpimg = ctx->get_image(shdr->RefPicList[X][ refIdxLX ]); |
1703 | 978k | if (tmpimg==NULL) { return; } |
1704 | 978k | const int referenced_POC = tmpimg->PicOrderCntVal; |
1705 | | |
1706 | 2.93M | for (int k=0;k<=1;k++) { |
1707 | | |
1708 | 1.95M | if (availableA[k] && |
1709 | 1.95M | out_availableFlagLXN[A]==0 && // no A?-predictor so far |
1710 | 1.95M | img->get_pred_mode(xA[k],yA[k]) != MODE_INTRA) { |
1711 | | |
1712 | 791k | int Y=1-X; |
1713 | | |
1714 | 791k | const PBMotion& vi = img->get_mv_info(xA[k],yA[k]); |
1715 | 791k | logtrace(LogMotion,"MVP A%d=\n",k); |
1716 | 791k | logmvcand(vi); |
1717 | | |
1718 | 791k | const de265_image* imgX = NULL; |
1719 | 791k | if (vi.predFlag[X]) { |
1720 | | // check for input data validity |
1721 | 763k | if (vi.refIdx[X]<0 || vi.refIdx[X] >= MAX_NUM_REF_PICS) { |
1722 | 0 | return; |
1723 | 0 | } |
1724 | | |
1725 | 763k | imgX = ctx->get_image(shdr->RefPicList[X][ vi.refIdx[X] ]); |
1726 | 763k | } |
1727 | | |
1728 | 791k | const de265_image* imgY = NULL; |
1729 | 791k | if (vi.predFlag[Y]) { |
1730 | | // check for input data validity |
1731 | 391k | if (vi.refIdx[Y]<0 || vi.refIdx[Y] >= MAX_NUM_REF_PICS) { |
1732 | 0 | return; |
1733 | 0 | } |
1734 | | |
1735 | 391k | imgY = ctx->get_image(shdr->RefPicList[Y][ vi.refIdx[Y] ]); |
1736 | 391k | } |
1737 | | |
1738 | | // check whether the predictor X is available and references the same POC |
1739 | 791k | if (vi.predFlag[X] && imgX && imgX->PicOrderCntVal == referenced_POC) { |
1740 | | |
1741 | 747k | logtrace(LogMotion,"take A%d/L%d as A candidate with same POC\n",k,X); |
1742 | | |
1743 | 747k | out_availableFlagLXN[A]=1; |
1744 | 747k | out_mvLXN[A] = vi.mv[X]; |
1745 | 747k | refIdxA = vi.refIdx[X]; |
1746 | 747k | } |
1747 | | // check whether the other predictor (Y) is available and references the same POC |
1748 | 43.7k | else if (vi.predFlag[Y] && imgY && imgY->PicOrderCntVal == referenced_POC) { |
1749 | | |
1750 | 19.5k | logtrace(LogMotion,"take A%d/L%d as A candidate with same POC\n",k,Y); |
1751 | | |
1752 | 19.5k | out_availableFlagLXN[A]=1; |
1753 | 19.5k | out_mvLXN[A] = vi.mv[Y]; |
1754 | 19.5k | refIdxA = vi.refIdx[Y]; |
1755 | 19.5k | } |
1756 | 791k | } |
1757 | 1.95M | } |
1758 | | |
1759 | | // 7. If there is no predictor referencing the same POC, we take any other reference as |
1760 | | // long as it is the same type of reference (long-term / short-term) |
1761 | | |
1762 | 1.39M | for (int k=0 ; k<=1 && out_availableFlagLXN[A]==0 ; k++) { |
1763 | 419k | int refPicList=-1; |
1764 | | |
1765 | 419k | if (availableA[k] && |
1766 | | // TODO: we could remove this call by storing the result of the similar computation above |
1767 | 419k | img->get_pred_mode(xA[k],yA[k]) != MODE_INTRA) { |
1768 | | |
1769 | 18.5k | int Y=1-X; |
1770 | | |
1771 | 18.5k | const PBMotion& vi = img->get_mv_info(xA[k],yA[k]); |
1772 | 18.5k | if (vi.predFlag[X]==1 && |
1773 | 18.5k | shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[X][ vi.refIdx[X] ]) { |
1774 | | |
1775 | 6.33k | logtrace(LogMotion,"take A%D/L%d as A candidate with different POCs\n",k,X); |
1776 | | |
1777 | 6.33k | out_availableFlagLXN[A]=1; |
1778 | 6.33k | out_mvLXN[A] = vi.mv[X]; |
1779 | 6.33k | refIdxA = vi.refIdx[X]; |
1780 | 6.33k | refPicList = X; |
1781 | 6.33k | } |
1782 | 12.1k | else if (vi.predFlag[Y]==1 && |
1783 | 12.1k | shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[Y][ vi.refIdx[Y] ]) { |
1784 | | |
1785 | 6.94k | logtrace(LogMotion,"take A%d/L%d as A candidate with different POCs\n",k,Y); |
1786 | | |
1787 | 6.94k | out_availableFlagLXN[A]=1; |
1788 | 6.94k | out_mvLXN[A] = vi.mv[Y]; |
1789 | 6.94k | refIdxA = vi.refIdx[Y]; |
1790 | 6.94k | refPicList = Y; |
1791 | 6.94k | } |
1792 | 18.5k | } |
1793 | | |
1794 | 419k | if (out_availableFlagLXN[A]==1) { |
1795 | 13.2k | if (refIdxA<0) { |
1796 | 0 | out_availableFlagLXN[0] = out_availableFlagLXN[1] = false; |
1797 | 0 | return; // error |
1798 | 0 | } |
1799 | | |
1800 | 13.2k | assert(refIdxA>=0); |
1801 | 13.2k | assert(refPicList>=0); |
1802 | | |
1803 | 13.2k | const de265_image* refPicA = ctx->get_image(shdr->RefPicList[refPicList][refIdxA ]); |
1804 | 13.2k | const de265_image* refPicX = ctx->get_image(shdr->RefPicList[X ][refIdxLX]); |
1805 | | |
1806 | | //int picStateA = shdr->RefPicList_PicState[refPicList][refIdxA ]; |
1807 | | //int picStateX = shdr->RefPicList_PicState[X ][refIdxLX]; |
1808 | | |
1809 | 13.2k | int isLongTermA = shdr->LongTermRefPic[refPicList][refIdxA ]; |
1810 | 13.2k | int isLongTermX = shdr->LongTermRefPic[X ][refIdxLX]; |
1811 | | |
1812 | 13.2k | logtrace(LogMotion,"scale MVP A: A-POC:%d X-POC:%d\n", |
1813 | 13.2k | refPicA->PicOrderCntVal,refPicX->PicOrderCntVal); |
1814 | | |
1815 | 13.2k | if (!isLongTermA && !isLongTermX) |
1816 | | /* |
1817 | | if (picStateA == UsedForShortTermReference && |
1818 | | picStateX == UsedForShortTermReference) |
1819 | | */ |
1820 | 11.6k | { |
1821 | 11.6k | int distA = img->PicOrderCntVal - refPicA->PicOrderCntVal; |
1822 | 11.6k | int distX = img->PicOrderCntVal - referenced_POC; |
1823 | | |
1824 | 11.6k | if (!scale_mv(&out_mvLXN[A], out_mvLXN[A], distA, distX)) { |
1825 | 1.90k | ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false); |
1826 | 1.90k | img->integrity = INTEGRITY_DECODING_ERRORS; |
1827 | 1.90k | } |
1828 | 11.6k | } |
1829 | 13.2k | } |
1830 | 419k | } |
1831 | | |
1832 | | // --- B --- |
1833 | | |
1834 | | // 1. |
1835 | | |
1836 | 978k | int xB[3], yB[3]; |
1837 | 978k | xB[0] = xP+nPbW; |
1838 | 978k | yB[0] = yP-1; |
1839 | 978k | xB[1] = xB[0]-1; |
1840 | 978k | yB[1] = yP-1; |
1841 | 978k | xB[2] = xP-1; |
1842 | 978k | yB[2] = yP-1; |
1843 | | |
1844 | | // 2. |
1845 | | |
1846 | 978k | out_availableFlagLXN[B] = 0; |
1847 | 978k | out_mvLXN[B].x = 0; |
1848 | 978k | out_mvLXN[B].y = 0; |
1849 | | |
1850 | | // 3. test B0,B1,B2 (Bk) |
1851 | | |
1852 | 978k | int refIdxB=-1; |
1853 | | |
1854 | 978k | bool availableB[3]; |
1855 | 3.91M | for (int k=0;k<3;k++) { |
1856 | 2.93M | availableB[k] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB[k],yB[k]); |
1857 | | |
1858 | 2.93M | if (availableB[k] && out_availableFlagLXN[B]==0) { |
1859 | | |
1860 | 866k | int Y=1-X; |
1861 | | |
1862 | 866k | const PBMotion& vi = img->get_mv_info(xB[k],yB[k]); |
1863 | 866k | logtrace(LogMotion,"MVP B%d=\n",k); |
1864 | 866k | logmvcand(vi); |
1865 | | |
1866 | 866k | const de265_image* imgX = NULL; |
1867 | 866k | if (vi.predFlag[X]) { |
1868 | 826k | if (vi.refIdx[X] < 0 || vi.refIdx[X] >= MAX_NUM_REF_PICS) { |
1869 | 0 | return; |
1870 | 0 | } |
1871 | | |
1872 | 826k | imgX = ctx->get_image(shdr->RefPicList[X][ vi.refIdx[X] ]); |
1873 | 826k | } |
1874 | | |
1875 | 866k | const de265_image* imgY = NULL; |
1876 | 866k | if (vi.predFlag[Y]) { |
1877 | 388k | if (vi.refIdx[Y] < 0 || vi.refIdx[Y] >= MAX_NUM_REF_PICS) { |
1878 | 0 | return; |
1879 | 0 | } |
1880 | | |
1881 | 388k | imgY = ctx->get_image(shdr->RefPicList[Y][ vi.refIdx[Y] ]); |
1882 | 388k | } |
1883 | | |
1884 | 866k | if (vi.predFlag[X] && imgX && imgX->PicOrderCntVal == referenced_POC) { |
1885 | 795k | logtrace(LogMotion,"a) take B%d/L%d as B candidate with same POC\n",k,X); |
1886 | | |
1887 | 795k | out_availableFlagLXN[B]=1; |
1888 | 795k | out_mvLXN[B] = vi.mv[X]; |
1889 | 795k | refIdxB = vi.refIdx[X]; |
1890 | 795k | } |
1891 | 70.8k | else if (vi.predFlag[Y] && imgY && imgY->PicOrderCntVal == referenced_POC) { |
1892 | 25.6k | logtrace(LogMotion,"b) take B%d/L%d as B candidate with same POC\n",k,Y); |
1893 | | |
1894 | 25.6k | out_availableFlagLXN[B]=1; |
1895 | 25.6k | out_mvLXN[B] = vi.mv[Y]; |
1896 | 25.6k | refIdxB = vi.refIdx[Y]; |
1897 | 25.6k | } |
1898 | 866k | } |
1899 | 2.93M | } |
1900 | | |
1901 | | // 4. |
1902 | | |
1903 | 978k | if (isScaledFlagLX==0 && // no A predictor, |
1904 | 978k | out_availableFlagLXN[B]) // but an unscaled B predictor |
1905 | 188k | { |
1906 | | // use unscaled B predictor as A predictor |
1907 | | |
1908 | 188k | logtrace(LogMotion,"copy the same-POC B candidate as additional A candidate\n"); |
1909 | | |
1910 | 188k | out_availableFlagLXN[A]=1; |
1911 | 188k | out_mvLXN[A] = out_mvLXN[B]; |
1912 | 188k | refIdxA = refIdxB; |
1913 | 188k | } |
1914 | | |
1915 | | // 5. |
1916 | | |
1917 | | // If no A predictor, we output the unscaled B as the A predictor (above) |
1918 | | // and also add a scaled B predictor here. |
1919 | | // If there is (probably) an A predictor, no differing-POC B predictor is generated. |
1920 | 978k | if (isScaledFlagLX==0) { |
1921 | 194k | out_availableFlagLXN[B]=0; |
1922 | | |
1923 | 527k | for (int k=0 ; k<=2 && out_availableFlagLXN[B]==0 ; k++) { |
1924 | 333k | int refPicList=-1; |
1925 | | |
1926 | 333k | if (availableB[k]) { |
1927 | 191k | int Y=1-X; |
1928 | | |
1929 | 191k | const PBMotion& vi = img->get_mv_info(xB[k],yB[k]); |
1930 | | |
1931 | 191k | if (vi.refIdx[X] >= MAX_NUM_REF_PICS) { |
1932 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
1933 | 0 | ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false); |
1934 | 0 | return; // error // TODO: we actually should make sure that this is never set to an out-of-range value |
1935 | 0 | } |
1936 | | |
1937 | 191k | if (vi.predFlag[X]==1 && |
1938 | 191k | shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[X][ vi.refIdx[X] ]) { |
1939 | 181k | out_availableFlagLXN[B]=1; |
1940 | 181k | out_mvLXN[B] = vi.mv[X]; |
1941 | 181k | refIdxB = vi.refIdx[X]; |
1942 | 181k | refPicList = X; |
1943 | 181k | } |
1944 | 9.66k | else if (vi.predFlag[Y]==1 && |
1945 | 9.66k | shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[Y][ vi.refIdx[Y] ]) { |
1946 | 8.47k | out_availableFlagLXN[B]=1; |
1947 | 8.47k | out_mvLXN[B] = vi.mv[Y]; |
1948 | 8.47k | refIdxB = vi.refIdx[Y]; |
1949 | 8.47k | refPicList = Y; |
1950 | 8.47k | } |
1951 | 191k | } |
1952 | | |
1953 | 333k | if (out_availableFlagLXN[B]==1) { |
1954 | 190k | if (refIdxB<0) { |
1955 | 0 | out_availableFlagLXN[0] = out_availableFlagLXN[1] = false; |
1956 | 0 | return; // error |
1957 | 0 | } |
1958 | | |
1959 | 190k | assert(refPicList>=0); |
1960 | 190k | assert(refIdxB>=0); |
1961 | | |
1962 | 190k | const de265_image* refPicB=ctx->get_image(shdr->RefPicList[refPicList][refIdxB ]); |
1963 | 190k | const de265_image* refPicX=ctx->get_image(shdr->RefPicList[X ][refIdxLX]); |
1964 | | |
1965 | 190k | int isLongTermB = shdr->LongTermRefPic[refPicList][refIdxB ]; |
1966 | 190k | int isLongTermX = shdr->LongTermRefPic[X ][refIdxLX]; |
1967 | | |
1968 | 190k | if (refPicB==NULL || refPicX==NULL) { |
1969 | 0 | img->decctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED,false); |
1970 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
1971 | 0 | } |
1972 | 190k | else if (refPicB->PicOrderCntVal != refPicX->PicOrderCntVal && |
1973 | 190k | !isLongTermB && !isLongTermX) { |
1974 | 1.90k | int distB = img->PicOrderCntVal - refPicB->PicOrderCntVal; |
1975 | 1.90k | int distX = img->PicOrderCntVal - referenced_POC; |
1976 | | |
1977 | 1.90k | logtrace(LogMotion,"scale MVP B: B-POC:%d X-POC:%d\n",refPicB->PicOrderCntVal,refPicX->PicOrderCntVal); |
1978 | | |
1979 | 1.90k | if (!scale_mv(&out_mvLXN[B], out_mvLXN[B], distB, distX)) { |
1980 | 315 | ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false); |
1981 | 315 | img->integrity = INTEGRITY_DECODING_ERRORS; |
1982 | 315 | } |
1983 | 1.90k | } |
1984 | 190k | } |
1985 | 333k | } |
1986 | 194k | } |
1987 | 978k | } |
1988 | | |
1989 | | |
1990 | | // 8.5.3.1.5 |
1991 | | void fill_luma_motion_vector_predictors(base_context* ctx, |
1992 | | const slice_segment_header* shdr, |
1993 | | de265_image* img, |
1994 | | int xC,int yC,int nCS,int xP,int yP, |
1995 | | int nPbW,int nPbH, int l, |
1996 | | int refIdx, int partIdx, |
1997 | | MotionVector out_mvpList[2]) |
1998 | 978k | { |
1999 | | // 8.5.3.1.6: derive two spatial vector predictors A (0) and B (1) |
2000 | | |
2001 | 978k | uint8_t availableFlagLXN[2]; |
2002 | 978k | MotionVector mvLXN[2]; |
2003 | | |
2004 | 978k | derive_spatial_luma_vector_prediction(ctx, img, shdr, xC,yC, nCS, xP,yP, |
2005 | 978k | nPbW,nPbH, l, refIdx, partIdx, |
2006 | 978k | availableFlagLXN, mvLXN); |
2007 | | |
2008 | | // 8.5.3.1.7: if we only have one spatial vector or both spatial vectors are the same, |
2009 | | // derive a temporal predictor |
2010 | | |
2011 | 978k | uint8_t availableFlagLXCol; |
2012 | 978k | MotionVector mvLXCol; |
2013 | | |
2014 | | |
2015 | 978k | if (availableFlagLXN[0] && |
2016 | 978k | availableFlagLXN[1] && |
2017 | 978k | (mvLXN[0].x != mvLXN[1].x || mvLXN[0].y != mvLXN[1].y)) { |
2018 | 305k | availableFlagLXCol = 0; |
2019 | 305k | } |
2020 | 672k | else { |
2021 | 672k | derive_temporal_luma_vector_prediction(ctx, img, shdr, |
2022 | 672k | xP,yP, nPbW,nPbH, refIdx,l, |
2023 | 672k | &mvLXCol, &availableFlagLXCol); |
2024 | 672k | } |
2025 | | |
2026 | | |
2027 | | // --- build candidate vector list with exactly two entries --- |
2028 | | |
2029 | 978k | int numMVPCandLX=0; |
2030 | | |
2031 | | // spatial predictor A |
2032 | | |
2033 | 978k | if (availableFlagLXN[0]) |
2034 | 968k | { |
2035 | 968k | out_mvpList[numMVPCandLX++] = mvLXN[0]; |
2036 | 968k | } |
2037 | | |
2038 | | // spatial predictor B (if not same as A) |
2039 | | |
2040 | 978k | if (availableFlagLXN[1] && |
2041 | 978k | (!availableFlagLXN[0] || // in case A in not available, but mvLXA initialized to same as mvLXB |
2042 | 823k | (mvLXN[0].x != mvLXN[1].x || mvLXN[0].y != mvLXN[1].y))) |
2043 | 309k | { |
2044 | 309k | out_mvpList[numMVPCandLX++] = mvLXN[1]; |
2045 | 309k | } |
2046 | | |
2047 | | // temporal predictor |
2048 | | |
2049 | 978k | if (availableFlagLXCol) |
2050 | 29.7k | { |
2051 | 29.7k | out_mvpList[numMVPCandLX++] = mvLXCol; |
2052 | 29.7k | } |
2053 | | |
2054 | | // fill with zero predictors |
2055 | | |
2056 | 1.62M | while (numMVPCandLX<2) { |
2057 | 649k | out_mvpList[numMVPCandLX].x = 0; |
2058 | 649k | out_mvpList[numMVPCandLX].y = 0; |
2059 | 649k | numMVPCandLX++; |
2060 | 649k | } |
2061 | | |
2062 | | |
2063 | 978k | assert(numMVPCandLX==2); |
2064 | 978k | } |
2065 | | |
2066 | | |
2067 | | MotionVector luma_motion_vector_prediction(base_context* ctx, |
2068 | | const slice_segment_header* shdr, |
2069 | | de265_image* img, |
2070 | | const PBMotionCoding& motion, |
2071 | | int xC,int yC,int nCS,int xP,int yP, |
2072 | | int nPbW,int nPbH, int l, |
2073 | | int refIdx, int partIdx) |
2074 | 978k | { |
2075 | 978k | MotionVector mvpList[2]; |
2076 | | |
2077 | 978k | fill_luma_motion_vector_predictors(ctx, shdr, img, |
2078 | 978k | xC,yC,nCS,xP,yP, |
2079 | 978k | nPbW, nPbH, l, refIdx, partIdx, |
2080 | 978k | mvpList); |
2081 | | |
2082 | | // select predictor according to mvp_lX_flag |
2083 | | |
2084 | 978k | return mvpList[ l ? motion.mvp_l1_flag : motion.mvp_l0_flag ]; |
2085 | 978k | } |
2086 | | |
2087 | | |
2088 | | #if DE265_LOG_TRACE |
2089 | | void logMV(int x0,int y0,int nPbW,int nPbH, const char* mode,const PBMotion* mv) |
2090 | | { |
2091 | | int pred0 = mv->predFlag[0]; |
2092 | | int pred1 = mv->predFlag[1]; |
2093 | | |
2094 | | logtrace(LogMotion, |
2095 | | "*MV %d;%d [%d;%d] %s: (%d) %d;%d @%d (%d) %d;%d @%d\n", x0,y0,nPbW,nPbH,mode, |
2096 | | pred0, |
2097 | | pred0 ? mv->mv[0].x : 0,pred0 ? mv->mv[0].y : 0, pred0 ? mv->refIdx[0] : 0, |
2098 | | pred1, |
2099 | | pred1 ? mv->mv[1].x : 0,pred1 ? mv->mv[1].y : 0, pred1 ? mv->refIdx[1] : 0); |
2100 | | } |
2101 | | #else |
2102 | | #define logMV(x0,y0,nPbW,nPbH,mode,mv) |
2103 | | #endif |
2104 | | |
2105 | | |
2106 | | |
2107 | | // 8.5.3.1 |
2108 | | void motion_vectors_and_ref_indices(base_context* ctx, |
2109 | | const slice_segment_header* shdr, |
2110 | | de265_image* img, |
2111 | | const PBMotionCoding& motion, |
2112 | | int xC,int yC, int xB,int yB, int nCS, int nPbW,int nPbH, |
2113 | | int partIdx, |
2114 | | PBMotion* out_vi) |
2115 | 2.76M | { |
2116 | | //slice_segment_header* shdr = tctx->shdr; |
2117 | | |
2118 | 2.76M | int xP = xC+xB; |
2119 | 2.76M | int yP = yC+yB; |
2120 | | |
2121 | 2.76M | enum PredMode predMode = img->get_pred_mode(xC,yC); |
2122 | | |
2123 | 2.76M | if (predMode == MODE_SKIP || |
2124 | 2.76M | (predMode == MODE_INTER && motion.merge_flag)) |
2125 | 1.99M | { |
2126 | 1.99M | derive_luma_motion_merge_mode(ctx,shdr,img, |
2127 | 1.99M | xC,yC, xP,yP, nCS,nPbW,nPbH, partIdx, |
2128 | 1.99M | motion.merge_idx, out_vi); |
2129 | | |
2130 | 1.99M | logMV(xP,yP,nPbW,nPbH, "merge_mode", out_vi); |
2131 | 1.99M | } |
2132 | 765k | else { |
2133 | 765k | int mvdL[2][2]; |
2134 | 765k | MotionVector mvpL[2]; |
2135 | | |
2136 | 2.29M | for (int l=0;l<2;l++) { |
2137 | | // 1. |
2138 | | |
2139 | 1.53M | enum InterPredIdc inter_pred_idc = (enum InterPredIdc)motion.inter_pred_idc; |
2140 | | |
2141 | 1.53M | if (inter_pred_idc == PRED_BI || |
2142 | 1.53M | (inter_pred_idc == PRED_L0 && l==0) || |
2143 | 1.53M | (inter_pred_idc == PRED_L1 && l==1)) { |
2144 | 978k | out_vi->refIdx[l] = motion.refIdx[l]; |
2145 | 978k | out_vi->predFlag[l] = 1; |
2146 | | |
2147 | 978k | if (motion.refIdx[l] >= MAX_NUM_REF_PICS) { |
2148 | 0 | out_vi->refIdx[l] = 0; |
2149 | |
|
2150 | 0 | img->integrity = INTEGRITY_DECODING_ERRORS; |
2151 | 0 | ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false); |
2152 | 0 | return; |
2153 | 0 | } |
2154 | 978k | } |
2155 | 551k | else { |
2156 | 551k | out_vi->refIdx[l] = -1; |
2157 | 551k | out_vi->predFlag[l] = 0; |
2158 | 551k | } |
2159 | | |
2160 | | // 2. |
2161 | | |
2162 | 1.53M | mvdL[l][0] = motion.mvd[l][0]; |
2163 | 1.53M | mvdL[l][1] = motion.mvd[l][1]; |
2164 | | |
2165 | | |
2166 | 1.53M | if (out_vi->predFlag[l]) { |
2167 | | // 3. |
2168 | | |
2169 | 978k | mvpL[l] = luma_motion_vector_prediction(ctx,shdr,img,motion, |
2170 | 978k | xC,yC,nCS,xP,yP, nPbW,nPbH, l, |
2171 | 978k | out_vi->refIdx[l], partIdx); |
2172 | | |
2173 | | // 4. |
2174 | | |
2175 | 978k | int32_t x = (mvpL[l].x + mvdL[l][0] + 0x10000) & 0xFFFF; |
2176 | 978k | int32_t y = (mvpL[l].y + mvdL[l][1] + 0x10000) & 0xFFFF; |
2177 | | |
2178 | 978k | out_vi->mv[l].x = (x>=0x8000) ? x-0x10000 : x; |
2179 | 978k | out_vi->mv[l].y = (y>=0x8000) ? y-0x10000 : y; |
2180 | 978k | } |
2181 | 1.53M | } |
2182 | | |
2183 | 765k | logMV(xP,yP,nPbW,nPbH, "mvp", out_vi); |
2184 | 765k | } |
2185 | 2.76M | } |
2186 | | |
2187 | | |
2188 | | // 8.5.3 |
2189 | | |
2190 | | /* xC/yC : CB position |
2191 | | xB/yB : position offset of the PB |
2192 | | nPbW/nPbH : size of PB |
2193 | | nCS : CB size |
2194 | | */ |
2195 | | void decode_prediction_unit(base_context* ctx, |
2196 | | const slice_segment_header* shdr, |
2197 | | de265_image* img, |
2198 | | const PBMotionCoding& motion, |
2199 | | int xC,int yC, int xB,int yB, int nCS, int nPbW,int nPbH, int partIdx) |
2200 | 2.76M | { |
2201 | 2.76M | logtrace(LogMotion,"decode_prediction_unit POC=%d %d;%d %dx%d\n", |
2202 | 2.76M | img->PicOrderCntVal, xC+xB,yC+yB, nPbW,nPbH); |
2203 | | |
2204 | | //slice_segment_header* shdr = tctx->shdr; |
2205 | | |
2206 | | // 1. |
2207 | | |
2208 | 2.76M | PBMotion vi; |
2209 | 2.76M | motion_vectors_and_ref_indices(ctx, shdr, img, motion, |
2210 | 2.76M | xC,yC, xB,yB, nCS, nPbW,nPbH, partIdx, &vi); |
2211 | | |
2212 | | // 2. |
2213 | | |
2214 | 2.76M | generate_inter_prediction_samples(ctx,shdr, img, xC,yC, xB,yB, nCS, nPbW,nPbH, &vi); |
2215 | | |
2216 | | |
2217 | 2.76M | img->set_mv_info(xC+xB,yC+yB,nPbW,nPbH, vi); |
2218 | 2.76M | } |