/src/ffmpeg/libavcodec/motion_est_template.c
Line | Count | Source |
1 | | /* |
2 | | * Motion estimation |
3 | | * Copyright (c) 2002-2004 Michael Niedermayer |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | /** |
23 | | * @file |
24 | | * Motion estimation template. |
25 | | */ |
26 | | |
27 | | #include "libavutil/qsort.h" |
28 | | #include "mpegvideoenc.h" |
29 | | |
30 | | //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) |
31 | | #define LOAD_COMMON\ |
32 | 2.48M | av_unused uint32_t * const score_map= c->score_map;\ |
33 | 2.48M | av_unused const int xmin= c->xmin;\ |
34 | 2.48M | av_unused const int ymin= c->ymin;\ |
35 | 2.48M | av_unused const int xmax= c->xmax;\ |
36 | 2.48M | av_unused const int ymax= c->ymax;\ |
37 | 2.48M | const uint8_t *mv_penalty = c->current_mv_penalty; \ |
38 | 2.48M | const int pred_x= c->pred_x;\ |
39 | 2.48M | const int pred_y= c->pred_y;\ |
40 | | |
41 | 0 | #define CHECK_HALF_MV(dx, dy, x, y)\ |
42 | 0 | {\ |
43 | 0 | const int hx= 2*(x)+(dx);\ |
44 | 0 | const int hy= 2*(y)+(dy);\ |
45 | 0 | d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\ |
46 | 0 | d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ |
47 | 0 | COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ |
48 | 0 | } |
49 | | |
50 | | static int hpel_motion_search(MPVEncContext *const s, |
51 | | int *mx_ptr, int *my_ptr, int dmin, |
52 | | int src_index, int ref_index, |
53 | | int size, int h) |
54 | 0 | { |
55 | 0 | MotionEstContext *const c = &s->me; |
56 | 0 | const int mx = *mx_ptr; |
57 | 0 | const int my = *my_ptr; |
58 | 0 | const int penalty_factor= c->sub_penalty_factor; |
59 | 0 | me_cmp_func cmp_sub, chroma_cmp_sub; |
60 | 0 | int bx=2*mx, by=2*my; |
61 | |
|
62 | 0 | LOAD_COMMON |
63 | 0 | int flags= c->sub_flags; |
64 | | |
65 | | //FIXME factorize |
66 | |
|
67 | 0 | cmp_sub = c->me_sub_cmp[size]; |
68 | 0 | chroma_cmp_sub = c->me_sub_cmp[size + 1]; |
69 | |
|
70 | 0 | if(c->skip){ //FIXME move out of hpel? |
71 | 0 | *mx_ptr = 0; |
72 | 0 | *my_ptr = 0; |
73 | 0 | return dmin; |
74 | 0 | } |
75 | | |
76 | 0 | if(c->avctx->me_cmp != c->avctx->me_sub_cmp){ |
77 | 0 | dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags); |
78 | 0 | if(mx || my || size>0) |
79 | 0 | dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; |
80 | 0 | } |
81 | |
|
82 | 0 | if (mx > xmin && mx < xmax && |
83 | 0 | my > ymin && my < ymax) { |
84 | 0 | int d= dmin; |
85 | 0 | const int index = my * (1 << ME_MAP_SHIFT) + mx; |
86 | 0 | const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] |
87 | 0 | + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor; |
88 | 0 | const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)] |
89 | 0 | + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor; |
90 | 0 | const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)] |
91 | 0 | + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor; |
92 | 0 | const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] |
93 | 0 | + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor; |
94 | |
|
95 | | #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1 |
96 | | unsigned key; |
97 | | unsigned map_generation= c->map_generation; |
98 | | key = (my - 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation; |
99 | | av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key); |
100 | | key = (my + 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation; |
101 | | av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key); |
102 | | key = (my) * (1 << ME_MAP_MV_BITS) + (mx + 1) + map_generation; |
103 | | av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key); |
104 | | key = (my) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation; |
105 | | av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key); |
106 | | #endif |
107 | 0 | if(t<=b){ |
108 | 0 | CHECK_HALF_MV(0, 1, mx ,my-1) |
109 | 0 | if(l<=r){ |
110 | 0 | CHECK_HALF_MV(1, 1, mx-1, my-1) |
111 | 0 | if(t+r<=b+l){ |
112 | 0 | CHECK_HALF_MV(1, 1, mx , my-1) |
113 | 0 | }else{ |
114 | 0 | CHECK_HALF_MV(1, 1, mx-1, my ) |
115 | 0 | } |
116 | 0 | CHECK_HALF_MV(1, 0, mx-1, my ) |
117 | 0 | }else{ |
118 | 0 | CHECK_HALF_MV(1, 1, mx , my-1) |
119 | 0 | if(t+l<=b+r){ |
120 | 0 | CHECK_HALF_MV(1, 1, mx-1, my-1) |
121 | 0 | }else{ |
122 | 0 | CHECK_HALF_MV(1, 1, mx , my ) |
123 | 0 | } |
124 | 0 | CHECK_HALF_MV(1, 0, mx , my ) |
125 | 0 | } |
126 | 0 | }else{ |
127 | 0 | if(l<=r){ |
128 | 0 | if(t+l<=b+r){ |
129 | 0 | CHECK_HALF_MV(1, 1, mx-1, my-1) |
130 | 0 | }else{ |
131 | 0 | CHECK_HALF_MV(1, 1, mx , my ) |
132 | 0 | } |
133 | 0 | CHECK_HALF_MV(1, 0, mx-1, my) |
134 | 0 | CHECK_HALF_MV(1, 1, mx-1, my) |
135 | 0 | }else{ |
136 | 0 | if(t+r<=b+l){ |
137 | 0 | CHECK_HALF_MV(1, 1, mx , my-1) |
138 | 0 | }else{ |
139 | 0 | CHECK_HALF_MV(1, 1, mx-1, my) |
140 | 0 | } |
141 | 0 | CHECK_HALF_MV(1, 0, mx , my) |
142 | 0 | CHECK_HALF_MV(1, 1, mx , my) |
143 | 0 | } |
144 | 0 | CHECK_HALF_MV(0, 1, mx , my) |
145 | 0 | } |
146 | 0 | av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2); |
147 | 0 | } |
148 | |
|
149 | 0 | *mx_ptr = bx; |
150 | 0 | *my_ptr = by; |
151 | |
|
152 | 0 | return dmin; |
153 | 0 | } |
154 | | |
155 | | static int no_sub_motion_search(MPVEncContext *const s, |
156 | | int *mx_ptr, int *my_ptr, int dmin, |
157 | | int src_index, int ref_index, |
158 | | int size, int h) |
159 | 0 | { |
160 | 0 | (*mx_ptr) *= 2; |
161 | 0 | (*my_ptr) *= 2; |
162 | 0 | return dmin; |
163 | 0 | } |
164 | | |
165 | | static inline int get_mb_score(MPVEncContext *const s, int mx, int my, |
166 | | int src_index, int ref_index, int size, |
167 | | int h, int add_rate) |
168 | 408k | { |
169 | 408k | MotionEstContext *const c = &s->me; |
170 | 408k | const int penalty_factor= c->mb_penalty_factor; |
171 | 408k | const int flags= c->mb_flags; |
172 | 408k | const int qpel= flags & FLAG_QPEL; |
173 | 408k | const int mask= 1+2*qpel; |
174 | 408k | me_cmp_func cmp_sub, chroma_cmp_sub; |
175 | 408k | int d; |
176 | | |
177 | 408k | LOAD_COMMON |
178 | | |
179 | | //FIXME factorize |
180 | | |
181 | 408k | cmp_sub = c->mb_cmp[size]; |
182 | 408k | chroma_cmp_sub = c->mb_cmp[size + 1]; |
183 | | |
184 | 408k | d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags); |
185 | | //FIXME check cbp before adding penalty for (0,0) vector |
186 | 408k | if(add_rate && (mx || my || size>0)) |
187 | 0 | d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; |
188 | | |
189 | 408k | return d; |
190 | 408k | } |
191 | | |
192 | | int ff_get_mb_score(MPVEncContext *const s, int mx, int my, int src_index, |
193 | | int ref_index, int size, int h, int add_rate) |
194 | 408k | { |
195 | 408k | return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate); |
196 | 408k | } |
197 | | |
198 | 0 | #define CHECK_QUARTER_MV(dx, dy, x, y)\ |
199 | 0 | {\ |
200 | 0 | const int hx= 4*(x)+(dx);\ |
201 | 0 | const int hy= 4*(y)+(dy);\ |
202 | 0 | d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ |
203 | 0 | d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ |
204 | 0 | COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ |
205 | 0 | } |
206 | | |
207 | | static int qpel_motion_search(MPVEncContext *const s, |
208 | | int *mx_ptr, int *my_ptr, int dmin, |
209 | | int src_index, int ref_index, |
210 | | int size, int h) |
211 | 0 | { |
212 | 0 | MotionEstContext *const c = &s->me; |
213 | 0 | const int mx = *mx_ptr; |
214 | 0 | const int my = *my_ptr; |
215 | 0 | const int penalty_factor= c->sub_penalty_factor; |
216 | 0 | const unsigned map_generation = c->map_generation; |
217 | 0 | const int subpel_quality= c->avctx->me_subpel_quality; |
218 | 0 | uint32_t *map= c->map; |
219 | 0 | me_cmp_func cmpf, chroma_cmpf; |
220 | 0 | me_cmp_func cmp_sub, chroma_cmp_sub; |
221 | |
|
222 | 0 | LOAD_COMMON |
223 | 0 | int flags= c->sub_flags; |
224 | |
|
225 | 0 | cmpf = c->me_cmp[size]; |
226 | 0 | chroma_cmpf = c->me_cmp[size + 1]; // FIXME: factorize |
227 | | //FIXME factorize |
228 | |
|
229 | 0 | cmp_sub = c->me_sub_cmp[size]; |
230 | 0 | chroma_cmp_sub = c->me_sub_cmp[size + 1]; |
231 | |
|
232 | 0 | if(c->skip){ //FIXME somehow move up (benchmark) |
233 | 0 | *mx_ptr = 0; |
234 | 0 | *my_ptr = 0; |
235 | 0 | return dmin; |
236 | 0 | } |
237 | | |
238 | 0 | if(c->avctx->me_cmp != c->avctx->me_sub_cmp){ |
239 | 0 | dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags); |
240 | 0 | if(mx || my || size>0) |
241 | 0 | dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor; |
242 | 0 | } |
243 | |
|
244 | 0 | if (mx > xmin && mx < xmax && |
245 | 0 | my > ymin && my < ymax) { |
246 | 0 | int bx=4*mx, by=4*my; |
247 | 0 | int d= dmin; |
248 | 0 | int i, nx, ny; |
249 | 0 | const int index = my * (1 << ME_MAP_SHIFT) + mx; |
250 | 0 | const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; |
251 | 0 | const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]; |
252 | 0 | const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]; |
253 | 0 | const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; |
254 | 0 | const int c= score_map[(index )&(ME_MAP_SIZE-1)]; |
255 | 0 | int best[8]; |
256 | 0 | int best_pos[8][2]; |
257 | |
|
258 | 0 | memset(best, 64, sizeof(int)*8); |
259 | 0 | if(s->me.dia_size>=2){ |
260 | 0 | const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; |
261 | 0 | const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; |
262 | 0 | const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; |
263 | 0 | const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; |
264 | |
|
265 | 0 | for(ny= -3; ny <= 3; ny++){ |
266 | 0 | for(nx= -3; nx <= 3; nx++){ |
267 | | //FIXME this could overflow (unlikely though) |
268 | 0 | const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t; |
269 | 0 | const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c; |
270 | 0 | const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b; |
271 | 0 | int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10; |
272 | 0 | int i; |
273 | |
|
274 | 0 | if((nx&3)==0 && (ny&3)==0) continue; |
275 | | |
276 | 0 | score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; |
277 | | |
278 | | // if(nx&1) score-=1024*c->penalty_factor; |
279 | | // if(ny&1) score-=1024*c->penalty_factor; |
280 | |
|
281 | 0 | for(i=0; i<8; i++){ |
282 | 0 | if(score < best[i]){ |
283 | 0 | memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); |
284 | 0 | memmove(&best_pos[i + 1], &best_pos[i], sizeof(*best_pos) * (7 - i)); |
285 | 0 | best[i]= score; |
286 | 0 | best_pos[i][0]= nx + 4*mx; |
287 | 0 | best_pos[i][1]= ny + 4*my; |
288 | 0 | break; |
289 | 0 | } |
290 | 0 | } |
291 | 0 | } |
292 | 0 | } |
293 | 0 | }else{ |
294 | 0 | int tl; |
295 | | //FIXME this could overflow (unlikely though) |
296 | 0 | const int cx = 4*(r - l); |
297 | 0 | const int cx2= r + l - 2*c; |
298 | 0 | const int cy = 4*(b - t); |
299 | 0 | const int cy2= b + t - 2*c; |
300 | 0 | int cxy; |
301 | |
|
302 | 0 | if (map[(index - (1 << ME_MAP_SHIFT) - 1) & (ME_MAP_SIZE - 1)] == |
303 | 0 | (my - 1) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation) { |
304 | 0 | tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; |
305 | 0 | }else{ |
306 | 0 | tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different |
307 | 0 | } |
308 | |
|
309 | 0 | cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; |
310 | |
|
311 | 0 | av_assert2(16*cx2 + 4*cx + 32*c == 32*r); |
312 | 0 | av_assert2(16*cx2 - 4*cx + 32*c == 32*l); |
313 | 0 | av_assert2(16*cy2 + 4*cy + 32*c == 32*b); |
314 | 0 | av_assert2(16*cy2 - 4*cy + 32*c == 32*t); |
315 | 0 | av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl); |
316 | |
|
317 | 0 | for(ny= -3; ny <= 3; ny++){ |
318 | 0 | for(nx= -3; nx <= 3; nx++){ |
319 | | //FIXME this could overflow (unlikely though) |
320 | 0 | int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor |
321 | 0 | int i; |
322 | |
|
323 | 0 | if((nx&3)==0 && (ny&3)==0) continue; |
324 | | |
325 | 0 | score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; |
326 | | // if(nx&1) score-=32*c->penalty_factor; |
327 | | // if(ny&1) score-=32*c->penalty_factor; |
328 | |
|
329 | 0 | for(i=0; i<8; i++){ |
330 | 0 | if(score < best[i]){ |
331 | 0 | memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); |
332 | 0 | memmove(best_pos[i + 1], best_pos[i], sizeof(best_pos[0]) * (7 - i)); |
333 | 0 | best[i]= score; |
334 | 0 | best_pos[i][0]= nx + 4*mx; |
335 | 0 | best_pos[i][1]= ny + 4*my; |
336 | 0 | break; |
337 | 0 | } |
338 | 0 | } |
339 | 0 | } |
340 | 0 | } |
341 | 0 | } |
342 | 0 | for(i=0; i<subpel_quality; i++){ |
343 | 0 | nx= best_pos[i][0]; |
344 | 0 | ny= best_pos[i][1]; |
345 | 0 | CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2) |
346 | 0 | } |
347 | |
|
348 | 0 | av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4); |
349 | |
|
350 | 0 | *mx_ptr = bx; |
351 | 0 | *my_ptr = by; |
352 | 0 | }else{ |
353 | 0 | *mx_ptr =4*mx; |
354 | 0 | *my_ptr =4*my; |
355 | 0 | } |
356 | |
|
357 | 0 | return dmin; |
358 | 0 | } |
359 | | |
360 | | |
361 | 5.47M | #define CHECK_MV(x,y)\ |
362 | 5.47M | {\ |
363 | 5.47M | const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ |
364 | 5.47M | const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ |
365 | 5.47M | av_assert2((x) >= xmin);\ |
366 | 5.47M | av_assert2((x) <= xmax);\ |
367 | 5.47M | av_assert2((y) >= ymin);\ |
368 | 5.47M | av_assert2((y) <= ymax);\ |
369 | 5.47M | if(map[index]!=key){\ |
370 | 2.88M | d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ |
371 | 2.88M | map[index]= key;\ |
372 | 2.88M | score_map[index]= d;\ |
373 | 2.88M | d += (mv_penalty[((x)*(1<<shift))-pred_x] + mv_penalty[((y)*(1<<shift))-pred_y])*penalty_factor;\ |
374 | 2.88M | COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ |
375 | 2.88M | }\ |
376 | 5.47M | } |
377 | | |
378 | 3.24M | #define CHECK_CLIPPED_MV(ax,ay)\ |
379 | 3.24M | {\ |
380 | 3.24M | const int Lx= ax;\ |
381 | 3.24M | const int Ly= ay;\ |
382 | 3.24M | const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\ |
383 | 3.24M | const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\ |
384 | 3.24M | CHECK_MV(Lx2, Ly2)\ |
385 | 3.24M | } |
386 | | |
387 | 7.12M | #define CHECK_MV_DIR(x,y,new_dir)\ |
388 | 7.12M | {\ |
389 | 7.12M | const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ |
390 | 7.12M | const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ |
391 | 7.12M | if(map[index]!=key){\ |
392 | 6.18M | d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ |
393 | 6.18M | map[index]= key;\ |
394 | 6.18M | score_map[index]= d;\ |
395 | 6.18M | d += (mv_penalty[(int)((unsigned)(x)<<shift)-pred_x] + mv_penalty[(int)((unsigned)(y)<<shift)-pred_y])*penalty_factor;\ |
396 | 6.18M | if(d<dmin){\ |
397 | 2.00M | best[0]=x;\ |
398 | 2.00M | best[1]=y;\ |
399 | 2.00M | dmin=d;\ |
400 | 2.00M | next_dir= new_dir;\ |
401 | 2.00M | }\ |
402 | 6.18M | }\ |
403 | 7.12M | } |
404 | | |
405 | | #define check(x,y,S,v)\ |
406 | | if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->c.mb_x, s->c.mb_y);\ |
407 | | if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->c.mb_x, s->c.mb_y);\ |
408 | | if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->c.mb_x, s->c.mb_y);\ |
409 | | if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->c.mb_x, s->c.mb_y);\ |
410 | | |
411 | | #define LOAD_COMMON2\ |
412 | 1.36M | uint32_t *map= c->map;\ |
413 | 1.36M | const int qpel= flags&FLAG_QPEL;\ |
414 | 1.36M | const int shift= 1+qpel;\ |
415 | | |
416 | | static av_always_inline int small_diamond_search(MPVEncContext *const s, int *best, int dmin, |
417 | | int src_index, int ref_index, const int penalty_factor, |
418 | | int size, int h, int flags) |
419 | 654k | { |
420 | 654k | MotionEstContext *const c = &s->me; |
421 | 654k | me_cmp_func cmpf, chroma_cmpf; |
422 | 654k | int next_dir=-1; |
423 | 654k | LOAD_COMMON |
424 | 654k | LOAD_COMMON2 |
425 | 654k | unsigned map_generation = c->map_generation; |
426 | | |
427 | 654k | cmpf = c->me_cmp[size]; |
428 | 654k | chroma_cmpf = c->me_cmp[size + 1]; |
429 | | |
430 | 654k | { /* ensure that the best point is in the MAP as h/qpel refinement needs it */ |
431 | 654k | const unsigned key = ((unsigned)best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation; |
432 | 654k | const int index= (((unsigned)best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1); |
433 | 654k | if (map[index] != key) { // this will be executed only very rarely |
434 | 26.8k | score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags); |
435 | 26.8k | map[index]= key; |
436 | 26.8k | } |
437 | 654k | } |
438 | | |
439 | 2.41M | for(;;){ |
440 | 2.41M | int d; |
441 | 2.41M | const int dir= next_dir; |
442 | 2.41M | const int x= best[0]; |
443 | 2.41M | const int y= best[1]; |
444 | 2.41M | next_dir=-1; |
445 | | |
446 | 2.41M | if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0) |
447 | 2.41M | if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1) |
448 | 2.41M | if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2) |
449 | 2.41M | if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3) |
450 | | |
451 | 2.41M | if(next_dir==-1){ |
452 | 654k | return dmin; |
453 | 654k | } |
454 | 2.41M | } |
455 | 654k | } |
456 | | |
457 | | static int funny_diamond_search(MPVEncContext *const s, int *best, int dmin, |
458 | | int src_index, int ref_index, const int penalty_factor, |
459 | | int size, int h, int flags) |
460 | 0 | { |
461 | 0 | MotionEstContext *const c = &s->me; |
462 | 0 | me_cmp_func cmpf, chroma_cmpf; |
463 | 0 | int dia_size; |
464 | 0 | LOAD_COMMON |
465 | 0 | LOAD_COMMON2 |
466 | 0 | unsigned map_generation = c->map_generation; |
467 | |
|
468 | 0 | cmpf = c->me_cmp[size]; |
469 | 0 | chroma_cmpf = c->me_cmp[size + 1]; |
470 | |
|
471 | 0 | for(dia_size=1; dia_size<=4; dia_size++){ |
472 | 0 | int dir; |
473 | 0 | const int x= best[0]; |
474 | 0 | const int y= best[1]; |
475 | |
|
476 | 0 | if(dia_size&(dia_size-1)) continue; |
477 | | |
478 | 0 | if( x + dia_size > xmax |
479 | 0 | || x - dia_size < xmin |
480 | 0 | || y + dia_size > ymax |
481 | 0 | || y - dia_size < ymin) |
482 | 0 | continue; |
483 | | |
484 | 0 | for(dir= 0; dir<dia_size; dir+=2){ |
485 | 0 | int d; |
486 | |
|
487 | 0 | CHECK_MV(x + dir , y + dia_size - dir); |
488 | 0 | CHECK_MV(x + dia_size - dir, y - dir ); |
489 | 0 | CHECK_MV(x - dir , y - dia_size + dir); |
490 | 0 | CHECK_MV(x - dia_size + dir, y + dir ); |
491 | 0 | } |
492 | |
|
493 | 0 | if(x!=best[0] || y!=best[1]) |
494 | 0 | dia_size=0; |
495 | 0 | } |
496 | 0 | return dmin; |
497 | 0 | } |
498 | | |
499 | | static int hex_search(MPVEncContext *const s, int *best, int dmin, |
500 | | int src_index, int ref_index, const int penalty_factor, |
501 | | int size, int h, int flags, int dia_size) |
502 | 0 | { |
503 | 0 | MotionEstContext *const c = &s->me; |
504 | 0 | me_cmp_func cmpf, chroma_cmpf; |
505 | 0 | LOAD_COMMON |
506 | 0 | LOAD_COMMON2 |
507 | 0 | unsigned map_generation = c->map_generation; |
508 | 0 | int x,y,d; |
509 | 0 | const int dec= dia_size & (dia_size-1); |
510 | |
|
511 | 0 | cmpf = c->me_cmp[size]; |
512 | 0 | chroma_cmpf = c->me_cmp[size + 1]; |
513 | |
|
514 | 0 | for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){ |
515 | 0 | do{ |
516 | 0 | x= best[0]; |
517 | 0 | y= best[1]; |
518 | |
|
519 | 0 | CHECK_CLIPPED_MV(x -dia_size , y); |
520 | 0 | CHECK_CLIPPED_MV(x+ dia_size , y); |
521 | 0 | CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size); |
522 | 0 | CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size); |
523 | 0 | if(dia_size>1){ |
524 | 0 | CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size); |
525 | 0 | CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size); |
526 | 0 | } |
527 | 0 | }while(best[0] != x || best[1] != y); |
528 | 0 | } |
529 | |
|
530 | 0 | return dmin; |
531 | 0 | } |
532 | | |
533 | | static int l2s_dia_search(MPVEncContext *const s, int *best, int dmin, |
534 | | int src_index, int ref_index, const int penalty_factor, |
535 | | int size, int h, int flags) |
536 | 0 | { |
537 | 0 | MotionEstContext *const c = &s->me; |
538 | 0 | me_cmp_func cmpf, chroma_cmpf; |
539 | 0 | LOAD_COMMON |
540 | 0 | LOAD_COMMON2 |
541 | 0 | unsigned map_generation = c->map_generation; |
542 | 0 | int x,y,i,d; |
543 | 0 | int dia_size= c->dia_size&0xFF; |
544 | 0 | const int dec= dia_size & (dia_size-1); |
545 | 0 | static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1}, |
546 | 0 | { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}}; |
547 | |
|
548 | 0 | cmpf = c->me_cmp[size]; |
549 | 0 | chroma_cmpf = c->me_cmp[size + 1]; |
550 | |
|
551 | 0 | for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){ |
552 | 0 | do{ |
553 | 0 | x= best[0]; |
554 | 0 | y= best[1]; |
555 | 0 | for(i=0; i<8; i++){ |
556 | 0 | CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size); |
557 | 0 | } |
558 | 0 | }while(best[0] != x || best[1] != y); |
559 | 0 | } |
560 | |
|
561 | 0 | x= best[0]; |
562 | 0 | y= best[1]; |
563 | 0 | CHECK_CLIPPED_MV(x+1, y); |
564 | 0 | CHECK_CLIPPED_MV(x, y+1); |
565 | 0 | CHECK_CLIPPED_MV(x-1, y); |
566 | 0 | CHECK_CLIPPED_MV(x, y-1); |
567 | |
|
568 | 0 | return dmin; |
569 | 0 | } |
570 | | |
571 | | static int umh_search(MPVEncContext *const s, int *best, int dmin, |
572 | | int src_index, int ref_index, const int penalty_factor, |
573 | | int size, int h, int flags) |
574 | 0 | { |
575 | 0 | MotionEstContext *const c = &s->me; |
576 | 0 | me_cmp_func cmpf, chroma_cmpf; |
577 | 0 | LOAD_COMMON |
578 | 0 | LOAD_COMMON2 |
579 | 0 | unsigned map_generation = c->map_generation; |
580 | 0 | int x,y,x2,y2, i, j, d; |
581 | 0 | const int dia_size= c->dia_size&0xFE; |
582 | 0 | static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2}, |
583 | 0 | { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2}, |
584 | 0 | {-2, 3}, { 0, 4}, { 2, 3}, |
585 | 0 | {-2,-3}, { 0,-4}, { 2,-3},}; |
586 | |
|
587 | 0 | cmpf = c->me_cmp[size]; |
588 | 0 | chroma_cmpf = c->me_cmp[size + 1]; |
589 | |
|
590 | 0 | x= best[0]; |
591 | 0 | y= best[1]; |
592 | 0 | for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){ |
593 | 0 | CHECK_MV(x2, y); |
594 | 0 | } |
595 | 0 | for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){ |
596 | 0 | CHECK_MV(x, y2); |
597 | 0 | } |
598 | |
|
599 | 0 | x= best[0]; |
600 | 0 | y= best[1]; |
601 | 0 | for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){ |
602 | 0 | for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){ |
603 | 0 | CHECK_MV(x2, y2); |
604 | 0 | } |
605 | 0 | } |
606 | | |
607 | | //FIXME prevent the CLIP stuff |
608 | |
|
609 | 0 | for(j=1; j<=dia_size/4; j++){ |
610 | 0 | for(i=0; i<16; i++){ |
611 | 0 | CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j); |
612 | 0 | } |
613 | 0 | } |
614 | |
|
615 | 0 | return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2); |
616 | 0 | } |
617 | | |
618 | | static int full_search(MPVEncContext *const s, int *best, int dmin, |
619 | | int src_index, int ref_index, const int penalty_factor, |
620 | | int size, int h, int flags) |
621 | 0 | { |
622 | 0 | MotionEstContext *const c = &s->me; |
623 | 0 | me_cmp_func cmpf, chroma_cmpf; |
624 | 0 | LOAD_COMMON |
625 | 0 | LOAD_COMMON2 |
626 | 0 | unsigned map_generation = c->map_generation; |
627 | 0 | int x,y, d; |
628 | 0 | const int dia_size= c->dia_size&0xFF; |
629 | |
|
630 | 0 | cmpf = c->me_cmp[size]; |
631 | 0 | chroma_cmpf = c->me_cmp[size + 1]; |
632 | |
|
633 | 0 | for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){ |
634 | 0 | for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){ |
635 | 0 | CHECK_MV(x, y); |
636 | 0 | } |
637 | 0 | } |
638 | |
|
639 | 0 | x= best[0]; |
640 | 0 | y= best[1]; |
641 | 0 | d= dmin; |
642 | 0 | CHECK_CLIPPED_MV(x , y); |
643 | 0 | CHECK_CLIPPED_MV(x+1, y); |
644 | 0 | CHECK_CLIPPED_MV(x, y+1); |
645 | 0 | CHECK_CLIPPED_MV(x-1, y); |
646 | 0 | CHECK_CLIPPED_MV(x, y-1); |
647 | 0 | best[0]= x; |
648 | 0 | best[1]= y; |
649 | |
|
650 | 0 | return d; |
651 | 0 | } |
652 | | |
653 | 0 | #define SAB_CHECK_MV(ax,ay)\ |
654 | 0 | {\ |
655 | 0 | const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\ |
656 | 0 | const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\ |
657 | 0 | if(map[index]!=key){\ |
658 | 0 | d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ |
659 | 0 | map[index]= key;\ |
660 | 0 | score_map[index]= d;\ |
661 | 0 | d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\ |
662 | 0 | if(d < minima[minima_count-1].height){\ |
663 | 0 | int j=0;\ |
664 | 0 | \ |
665 | 0 | while(d >= minima[j].height) j++;\ |
666 | 0 | \ |
667 | 0 | memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\ |
668 | 0 | \ |
669 | 0 | minima[j].checked= 0;\ |
670 | 0 | minima[j].height= d;\ |
671 | 0 | minima[j].x= ax;\ |
672 | 0 | minima[j].y= ay;\ |
673 | 0 | \ |
674 | 0 | i=-1;\ |
675 | 0 | continue;\ |
676 | 0 | }\ |
677 | 0 | }\ |
678 | 0 | } |
679 | | |
680 | 0 | #define MAX_SAB_SIZE ME_MAP_SIZE |
681 | | static int sab_diamond_search(MPVEncContext *const s, int *best, int dmin, |
682 | | int src_index, int ref_index, const int penalty_factor, |
683 | | int size, int h, int flags) |
684 | 0 | { |
685 | 0 | MotionEstContext *const c = &s->me; |
686 | 0 | me_cmp_func cmpf, chroma_cmpf; |
687 | 0 | Minima minima[MAX_SAB_SIZE]; |
688 | 0 | const int minima_count= FFABS(c->dia_size); |
689 | 0 | int i, j; |
690 | 0 | LOAD_COMMON |
691 | 0 | LOAD_COMMON2 |
692 | 0 | unsigned map_generation = c->map_generation; |
693 | |
|
694 | 0 | av_assert1(minima_count <= MAX_SAB_SIZE); |
695 | |
|
696 | 0 | cmpf = c->me_cmp[size]; |
697 | 0 | chroma_cmpf = c->me_cmp[size + 1]; |
698 | | |
699 | | /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can |
700 | | become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map |
701 | | */ |
702 | 0 | for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){ |
703 | 0 | uint32_t key= map[i]; |
704 | |
|
705 | 0 | key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1)); |
706 | |
|
707 | 0 | if ((key & (-(1 << (2 * ME_MAP_MV_BITS)))) != map_generation) |
708 | 0 | continue; |
709 | | |
710 | 0 | minima[j].height= score_map[i]; |
711 | 0 | minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS; |
712 | 0 | minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1); |
713 | 0 | minima[j].x-= (1<<(ME_MAP_MV_BITS-1)); |
714 | 0 | minima[j].y-= (1<<(ME_MAP_MV_BITS-1)); |
715 | | |
716 | | // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space |
717 | 0 | if( minima[j].x > xmax || minima[j].x < xmin |
718 | 0 | || minima[j].y > ymax || minima[j].y < ymin) |
719 | 0 | continue; |
720 | | |
721 | 0 | minima[j].checked=0; |
722 | 0 | if(minima[j].x || minima[j].y) |
723 | 0 | minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor; |
724 | |
|
725 | 0 | j++; |
726 | 0 | } |
727 | |
|
728 | 0 | AV_QSORT(minima, j, Minima, minima_cmp); |
729 | |
|
730 | 0 | for(; j<minima_count; j++){ |
731 | 0 | minima[j].height=256*256*256*64; |
732 | 0 | minima[j].checked=0; |
733 | 0 | minima[j].x= minima[j].y=0; |
734 | 0 | } |
735 | |
|
736 | 0 | for(i=0; i<minima_count; i++){ |
737 | 0 | const int x= minima[i].x; |
738 | 0 | const int y= minima[i].y; |
739 | 0 | int d; |
740 | |
|
741 | 0 | if(minima[i].checked) continue; |
742 | | |
743 | 0 | if( x >= xmax || x <= xmin |
744 | 0 | || y >= ymax || y <= ymin) |
745 | 0 | continue; |
746 | | |
747 | 0 | SAB_CHECK_MV(x-1, y) |
748 | 0 | SAB_CHECK_MV(x+1, y) |
749 | 0 | SAB_CHECK_MV(x , y-1) |
750 | 0 | SAB_CHECK_MV(x , y+1) |
751 | | |
752 | 0 | minima[i].checked= 1; |
753 | 0 | } |
754 | |
|
755 | 0 | best[0]= minima[0].x; |
756 | 0 | best[1]= minima[0].y; |
757 | 0 | dmin= minima[0].height; |
758 | |
|
759 | 0 | if( best[0] < xmax && best[0] > xmin |
760 | 0 | && best[1] < ymax && best[1] > ymin){ |
761 | 0 | int d; |
762 | | // ensure that the reference samples for hpel refinement are in the map |
763 | 0 | CHECK_MV(best[0]-1, best[1]) |
764 | 0 | CHECK_MV(best[0]+1, best[1]) |
765 | 0 | CHECK_MV(best[0], best[1]-1) |
766 | 0 | CHECK_MV(best[0], best[1]+1) |
767 | 0 | } |
768 | 0 | return dmin; |
769 | 0 | } |
770 | | |
771 | | static int var_diamond_search(MPVEncContext *const s, int *best, int dmin, |
772 | | int src_index, int ref_index, const int penalty_factor, |
773 | | int size, int h, int flags) |
774 | 0 | { |
775 | 0 | MotionEstContext *const c = &s->me; |
776 | 0 | me_cmp_func cmpf, chroma_cmpf; |
777 | 0 | int dia_size; |
778 | 0 | LOAD_COMMON |
779 | 0 | LOAD_COMMON2 |
780 | 0 | unsigned map_generation = c->map_generation; |
781 | |
|
782 | 0 | cmpf = c->me_cmp[size]; |
783 | 0 | chroma_cmpf = c->me_cmp[size + 1]; |
784 | |
|
785 | 0 | for(dia_size=1; dia_size<=c->dia_size; dia_size++){ |
786 | 0 | int dir, start, end; |
787 | 0 | const int x= best[0]; |
788 | 0 | const int y= best[1]; |
789 | |
|
790 | 0 | start= FFMAX(0, y + dia_size - ymax); |
791 | 0 | end = FFMIN(dia_size, xmax - x + 1); |
792 | 0 | for(dir= start; dir<end; dir++){ |
793 | 0 | int d; |
794 | | |
795 | | //check(x + dir,y + dia_size - dir,0, a0) |
796 | 0 | CHECK_MV(x + dir , y + dia_size - dir); |
797 | 0 | } |
798 | |
|
799 | 0 | start= FFMAX(0, x + dia_size - xmax); |
800 | 0 | end = FFMIN(dia_size, y - ymin + 1); |
801 | 0 | for(dir= start; dir<end; dir++){ |
802 | 0 | int d; |
803 | | |
804 | | //check(x + dia_size - dir, y - dir,0, a1) |
805 | 0 | CHECK_MV(x + dia_size - dir, y - dir ); |
806 | 0 | } |
807 | |
|
808 | 0 | start= FFMAX(0, -y + dia_size + ymin ); |
809 | 0 | end = FFMIN(dia_size, x - xmin + 1); |
810 | 0 | for(dir= start; dir<end; dir++){ |
811 | 0 | int d; |
812 | | |
813 | | //check(x - dir,y - dia_size + dir,0, a2) |
814 | 0 | CHECK_MV(x - dir , y - dia_size + dir); |
815 | 0 | } |
816 | |
|
817 | 0 | start= FFMAX(0, -x + dia_size + xmin ); |
818 | 0 | end = FFMIN(dia_size, ymax - y + 1); |
819 | 0 | for(dir= start; dir<end; dir++){ |
820 | 0 | int d; |
821 | | |
822 | | //check(x - dia_size + dir, y + dir,0, a3) |
823 | 0 | CHECK_MV(x - dia_size + dir, y + dir ); |
824 | 0 | } |
825 | |
|
826 | 0 | if(x!=best[0] || y!=best[1]) |
827 | 0 | dia_size=0; |
828 | 0 | } |
829 | 0 | return dmin; |
830 | 0 | } |
831 | | |
832 | | static av_always_inline int diamond_search(MPVEncContext *const s, int *best, int dmin, |
833 | | int src_index, int ref_index, const int penalty_factor, |
834 | 654k | int size, int h, int flags){ |
835 | 654k | MotionEstContext *const c = &s->me; |
836 | 654k | if(c->dia_size==-1) |
837 | 0 | return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
838 | 654k | else if(c->dia_size<-1) |
839 | 0 | return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
840 | 654k | else if(c->dia_size<2) |
841 | 654k | return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
842 | 0 | else if(c->dia_size>1024) |
843 | 0 | return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
844 | 0 | else if(c->dia_size>768) |
845 | 0 | return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
846 | 0 | else if(c->dia_size>512) |
847 | 0 | return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF); |
848 | 0 | else if(c->dia_size>256) |
849 | 0 | return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
850 | 0 | else |
851 | 0 | return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
852 | 654k | } |
853 | | |
854 | | /** |
855 | | @param P a list of candidate mvs to check before starting the |
856 | | iterative search. If one of the candidates is close to the optimal mv, then |
857 | | it takes fewer iterations. And it increases the chance that we find the |
858 | | optimal mv. |
859 | | */ |
860 | | static av_always_inline int epzs_motion_search_internal(MPVEncContext *const s, int *mx_ptr, int *my_ptr, |
861 | | int P[10][2], int src_index, int ref_index, const int16_t (*last_mv)[2], |
862 | | int ref_mv_scale, int flags, int size, int h) |
863 | 711k | { |
864 | 711k | MotionEstContext *const c = &s->me; |
865 | 711k | int best[2]={0, 0}; /**< x and y coordinates of the best motion vector. |
866 | | i.e. the difference between the position of the |
867 | | block currently being encoded and the position of |
868 | | the block chosen to predict it from. */ |
869 | 711k | int d; ///< the score (cmp + penalty) of any given mv |
870 | 711k | int dmin; /**< the best value of d, i.e. the score |
871 | | corresponding to the mv stored in best[]. */ |
872 | 711k | unsigned map_generation; |
873 | 711k | int penalty_factor; |
874 | 711k | const int ref_mv_stride= s->c.mb_stride; //pass as arg FIXME |
875 | 711k | const int ref_mv_xy = s->c.mb_x + s->c.mb_y * ref_mv_stride; // add to last_mv before passing FIXME |
876 | 711k | me_cmp_func cmpf, chroma_cmpf; |
877 | | |
878 | 711k | LOAD_COMMON |
879 | 711k | LOAD_COMMON2 |
880 | | |
881 | 711k | if(c->pre_pass){ |
882 | 0 | penalty_factor= c->pre_penalty_factor; |
883 | 0 | cmpf = c->me_pre_cmp[size]; |
884 | 0 | chroma_cmpf = c->me_pre_cmp[size + 1]; |
885 | 711k | }else{ |
886 | 711k | penalty_factor= c->penalty_factor; |
887 | 711k | cmpf = c->me_cmp[size]; |
888 | 711k | chroma_cmpf = c->me_cmp[size + 1]; |
889 | 711k | } |
890 | | |
891 | 711k | map_generation= update_map_generation(c); |
892 | | |
893 | 711k | av_assert2(cmpf); |
894 | 711k | dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags); |
895 | 711k | map[0]= map_generation; |
896 | 711k | score_map[0]= dmin; |
897 | | |
898 | | //FIXME precalc first term below? |
899 | 711k | if ((s->c.pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || |
900 | 711k | s->mpv_flags & FF_MPV_FLAG_MV0) |
901 | 0 | dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor; |
902 | | |
903 | | /* first line */ |
904 | 711k | if (s->c.first_slice_line) { |
905 | 130k | CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
906 | 130k | CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
907 | 130k | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) |
908 | 581k | }else{ |
909 | 581k | if (dmin < ((h * h * c->avctx->mv0_threshold) >> 8) |
910 | 75.6k | && ( P_LEFT[0] |P_LEFT[1] |
911 | 75.6k | |P_TOP[0] |P_TOP[1] |
912 | 75.6k | |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ |
913 | 57.5k | *mx_ptr= 0; |
914 | 57.5k | *my_ptr= 0; |
915 | 57.5k | c->skip=1; |
916 | 57.5k | return dmin; |
917 | 57.5k | } |
918 | 523k | CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift) |
919 | 523k | CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1) |
920 | 523k | CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1) |
921 | 523k | CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) ) |
922 | 523k | CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) ) |
923 | 523k | CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
924 | 523k | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) |
925 | 523k | CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) |
926 | 523k | CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift) |
927 | 523k | CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) |
928 | 523k | } |
929 | 654k | if(dmin>h*h*4){ |
930 | 501k | if(c->pre_pass){ |
931 | 0 | CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, |
932 | 0 | (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16) |
933 | 0 | if(!s->c.first_slice_line) |
934 | 0 | CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, |
935 | 0 | (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) |
936 | 501k | }else{ |
937 | 501k | CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, |
938 | 501k | (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) |
939 | 501k | if(s->c.mb_y+1<s->c.end_mb_y) //FIXME replace at least with last_slice_line |
940 | 0 | CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, |
941 | 501k | (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) |
942 | 501k | } |
943 | 501k | } |
944 | | |
945 | 654k | if(c->avctx->last_predictor_count){ |
946 | 0 | const int count= c->avctx->last_predictor_count; |
947 | 0 | const int xstart= FFMAX(0, s->c.mb_x - count); |
948 | 0 | const int ystart= FFMAX(0, s->c.mb_y - count); |
949 | 0 | const int xend= FFMIN(s->c.mb_width , s->c.mb_x + count + 1); |
950 | 0 | const int yend= FFMIN(s->c.mb_height, s->c.mb_y + count + 1); |
951 | 0 | int mb_y; |
952 | |
|
953 | 0 | for(mb_y=ystart; mb_y<yend; mb_y++){ |
954 | 0 | int mb_x; |
955 | 0 | for(mb_x=xstart; mb_x<xend; mb_x++){ |
956 | 0 | const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride; |
957 | 0 | int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16; |
958 | 0 | int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16; |
959 | |
|
960 | 0 | if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue; |
961 | 0 | CHECK_MV(mx,my) |
962 | 0 | } |
963 | 0 | } |
964 | 0 | } |
965 | | |
966 | | //check(best[0],best[1],0, b0) |
967 | 654k | dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
968 | | |
969 | | //check(best[0],best[1],0, b1) |
970 | 654k | *mx_ptr= best[0]; |
971 | 654k | *my_ptr= best[1]; |
972 | | |
973 | 654k | return dmin; |
974 | 711k | } |
975 | | |
976 | | //this function is dedicated to the brain damaged gcc |
977 | | int ff_epzs_motion_search(MPVEncContext *const s, int *mx_ptr, int *my_ptr, |
978 | | int P[10][2], int src_index, int ref_index, |
979 | | const int16_t (*last_mv)[2], int ref_mv_scale, |
980 | | int size, int h) |
981 | 711k | { |
982 | 711k | MotionEstContext *const c = &s->me; |
983 | | //FIXME convert other functions in the same way if faster |
984 | 711k | if(c->flags==0 && h==16 && size==0){ |
985 | 711k | return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16); |
986 | | // case FLAG_QPEL: |
987 | | // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL); |
988 | 711k | }else{ |
989 | 0 | return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h); |
990 | 0 | } |
991 | 711k | } |
992 | | |
993 | | static int epzs_motion_search2(MPVEncContext *const s, |
994 | | int *mx_ptr, int *my_ptr, int P[10][2], |
995 | | int src_index, int ref_index, const int16_t (*last_mv)[2], |
996 | | int ref_mv_scale, const int size) |
997 | 0 | { |
998 | 0 | MotionEstContext *const c = &s->me; |
999 | 0 | int best[2]={0, 0}; |
1000 | 0 | int d, dmin; |
1001 | 0 | unsigned map_generation; |
1002 | 0 | const int penalty_factor= c->penalty_factor; |
1003 | 0 | const int h=8; |
1004 | 0 | const int ref_mv_stride= s->c.mb_stride; |
1005 | 0 | const int ref_mv_xy= s->c.mb_x + s->c.mb_y *ref_mv_stride; |
1006 | 0 | me_cmp_func cmpf, chroma_cmpf; |
1007 | 0 | LOAD_COMMON |
1008 | 0 | int flags= c->flags; |
1009 | 0 | LOAD_COMMON2 |
1010 | |
|
1011 | 0 | cmpf = c->me_cmp[size]; |
1012 | 0 | chroma_cmpf = c->me_cmp[size + 1]; |
1013 | |
|
1014 | 0 | map_generation= update_map_generation(c); |
1015 | |
|
1016 | 0 | dmin = 1000000; |
1017 | | |
1018 | | /* first line */ |
1019 | 0 | if (s->c.first_slice_line) { |
1020 | 0 | CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
1021 | 0 | CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
1022 | 0 | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) |
1023 | 0 | CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) |
1024 | 0 | }else{ |
1025 | 0 | CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) |
1026 | | //FIXME try some early stop |
1027 | 0 | CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) |
1028 | 0 | CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
1029 | 0 | CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift) |
1030 | 0 | CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) |
1031 | 0 | CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
1032 | 0 | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) |
1033 | 0 | } |
1034 | 0 | if(dmin>64*4){ |
1035 | 0 | CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, |
1036 | 0 | (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) |
1037 | 0 | if(s->c.mb_y+1<s->c.end_mb_y) //FIXME replace at least with last_slice_line |
1038 | 0 | CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, |
1039 | 0 | (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) |
1040 | 0 | } |
1041 | |
|
1042 | 0 | dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); |
1043 | |
|
1044 | 0 | *mx_ptr= best[0]; |
1045 | 0 | *my_ptr= best[1]; |
1046 | |
|
1047 | 0 | return dmin; |
1048 | 0 | } |